PDForensic (version 0.2.0) | index PDForensic.py |
This tool analyses PDF files for Forensic Investigations
~# cat blank.pdf | python3.11 PDForensic.py - *.pdf ../*.pdf https://www.pdfscripting.com/public/FreeStuff/PDFSamples/TheFlyv3_EN4Rdr.pdf
...
~# python3.11 PDForensic.py blank.pdf
0 pdf_tag b'%PDF-1.6\r'
1 type ObjStm
2 type ObjStm
3 type XRef
4 type Outlines
5 subtype Type1
5 type Font
7 type Pages
9 subtype XML
9 type Metadata
10 date b"D:20060216150351-08'00'"
10 date b"D:20080816125100-07'00'"
15 date b"D:20080816125100-07'00'"
15 type TransformParams
15 type SigRef
15 type Sig
15 acroform b'/AcroForm 21 0 R/'
15 type Catalog
17 scripts b'/JavaScript'
19 scripts b'/JavaScript/JS'
21 type OCG
22 type Page
23 subtype Widget
23 type Annot
24 subtype Form
24 type XObject
25 subtype Form
25 type XObject
27 subtype Image
27 type XObject
28 subtype Image
28 type XObject
29 subtype Widget
29 type Annot
30 subtype Form
30 type XObject
31 subtype Form
31 type XObject
32 subtype Form
32 type XObject
33 subtype Form
33 type XObject
34 subtype Widget
34 type Annot
35 subtype Form
35 type XObject
36 subtype Form
36 type XObject
37 subtype Form
37 type XObject
38 subtype Form
38 type XObject
39 subtype Widget
39 type Annot
40 subtype Form
40 type XObject
41 subtype Form
41 type XObject
42 subtype Form
42 type XObject
43 subtype Form
43 type XObject
44 subtype Widget
44 type Annot
45 subtype Form
45 type XObject
46 subtype Type1
46 type Font
47 type Encoding
48 subtype Link
48 type Border
48 type Annot
49 subtype Widget
49 type Annot
50 subtype Form
50 type XObject
51 subtype Type1
51 type Font
52 subtype Widget
52 type Annot
53 subtype Form
53 type XObject
54 subtype Form
54 type XObject
55 subtype Widget
55 type Annot
56 subtype Form
56 type XObject
57 subtype Form
57 type XObject
58 subtype Widget
58 type Annot
59 subtype Form
59 type XObject
60 subtype Widget
60 type Annot
61 subtype Form
61 type XObject
62 subtype Form
62 type XObject
63 subtype Image
63 type XObject
64 subtype Widget
64 type Annot
65 subtype Form
65 type XObject
66 subtype Form
66 type XObject
67 subtype Form
67 type XObject
68 subtype Popup
68 type Annot
69 subtype Widget
69 type Annot
70 subtype Form
70 type XObject
71 subtype Type1
71 type Font
72 type Encoding
74 subtype TrueType
74 type Font
75 subtype TrueType
75 type Font
76 subtype TrueType
76 type Font
77 subtype TrueType
77 type Font
78 URI b'/URI(http://www.pdfscripting.com)/S/URI>>'
79 scripts b'/JavaScript/JS(\\nif\\(this.bouncing\\)\\r\\n{\\r\\n\tthis.bouncing = false;\\r\\n\tapp.clearInterval\\(this.bounceTime\\);\\r\\n\tthis.bounceTime = null;\\r\\n}\\r\\n\\r\\n//app.clearInterval\\(timer\\); // stop timer\\r\\n//app.clearTimeOut\\(timeout\\); // stop timer\\r\\n\\r\\n\\r\\n\\r)>>'
81 scripts b'/JavaScript/JS'
83 scripts b'/JavaScript/JS(\\nXinc = 5;\\r\\nYinc = 5;\\r\\n\\r\\n\\r)>>'
84 scripts b'/JavaScript/JS(\\nXinc = 3;\\r\\nYinc = 3;\\r\\n\\r\\n\\r)>>'
85 scripts b'/JavaScript/JS(\\nXinc = 1;\\r\\nYinc = 1;\\r\\n\\r)>>'
86 scripts b'/JavaScript/JS'
89 subtype Image
89 type XObject
91 type FontDescriptor
93 type FontDescriptor
94 type FontDescriptor
95 type ExtGState
96 type ExtGState
97 URI b'/URI(http://www.windjack.com)/S/URI>>'
98 xref
99 xref
100 startxref
101 eof_tag b'%%EOF\r'
{
"tool": "PDForensic",
"version": "0.0.1",
"file": "<http.client.HTTPResponse object at 0x7f4cbbc6fd60>",
"date": "2022-12-27T18:53:59.873367",
"malicious": {
"score": "26%",
"types": [
"acroform",
"scripts",
"URI"
]
},
"objects": {
"found": 102,
"processed": 146,
"counter": {
"type - XObject": 31,
"subtype - Form": 27,
"type - Annot": 14,
"subtype - Widget": 12,
"type - Font": 8,
"subtype - Type1": 4,
"subtype - Image": 4,
"subtype - TrueType": 4,
"type - FontDescriptor": 3,
"type - ObjStm": 2,
"type - Encoding": 2,
"type - ExtGState": 2,
"type - XRef": 1,
"type - Outlines": 1,
"type - Pages": 1,
"subtype - XML": 1,
"type - Metadata": 1,
"type - TransformParams": 1,
"type - SigRef": 1,
"type - Sig": 1,
"type - Catalog": 1,
"type - OCG": 1,
"type - Page": 1,
"subtype - Link": 1,
"type - Border": 1,
"subtype - Popup": 1
}
},
"filters": {
"ids": [],
"types": [],
"regex": [],
"strings": [],
"raw data - hexadecimal": []
}
}
~# python3.11 PDForensic.py objstm.pdf --data --types objstm --no-csv --no-json
0 pdf_tag b'%PDF-1.5\n'
1 object b'1 0 obj\n<< /Type /ObjStm /Length 236 /N 4 /First 20 >>\nstream\n2 0 3 34 4 78 5 143\n<< /Pages 3 0 R /Type /Catalog >>\n<< /Count 1 /Kids [ 4 0 R ] /Type /Pages >>\n<< /Contents 6 0 R /Parent 3 0 R /Resources 5 0 R /Type /Page >>\n<< /Font << /F1 << /BaseFont /Arial /Subtype /Type1 /Type /Font >> >> >>\nendstream\nendobj'
4 startxref
5 eof_tag b'%%EOF\n'
{
"tool": "PDForensic",
"version": "0.0.1",
"file": "objstm.pdf",
"date": "2022-12-27T19:42:13.226314",
"malicious": {
"score": "0%",
"types": []
},
"objects": {
"found": 6,
"processed": 4,
"counter": {
"type - ObjStm ": 1,
"type - XRef ": 1
}
},
"filters": {
"ids": [],
"types": [
"objstm"
],
"regex": [],
"strings": [],
"raw data - hexadecimal": []
}
}
~# python3.11 PDForensic.py https://www.pdfscripting.com/public/FreeStuff/PDFSamples/TheFlyv3_EN4Rdr.pdf --data --ids 79 83 --ids 84 --strings URI --no-csv --no-json
0 pdf_tag b'%PDF-1.6\r'
78 object b'87 0 obj\r<</URI(http://www.pdfscripting.com)/S/URI>>\rendobj'
79 object b'89 0 obj\r<</S/JavaScript/JS(\\nif\\(this.bouncing\\)\\r\\n{\\r\\n\tthis.bouncing = false;\\r\\n\tapp.clearInterval\\(this.bounceTime\\);\\r\\n\tthis.bounceTime = null;\\r\\n}\\r\\n\\r\\n//app.clearInterval\\(timer\\); // stop timer\\r\\n//app.clearTimeOut\\(timeout\\); // stop timer\\r\\n\\r\\n\\r\\n\\r)>>\rendobj'
83 object b'94 0 obj\r<</S/JavaScript/JS(\\nXinc = 5;\\r\\nYinc = 5;\\r\\n\\r\\n\\r)>>\rendobj'
84 object b'95 0 obj\r<</S/JavaScript/JS(\\nXinc = 3;\\r\\nYinc = 3;\\r\\n\\r\\n\\r)>>\rendobj'
97 object b'108 0 obj\r<</URI(http://www.windjack.com)/S/URI>>\rendobj'
98 xref
99 xref
100 startxref
101 eof_tag b'%%EOF\r'
{
"tool": "PDForensic",
"version": "0.0.1",
"file": "<http.client.HTTPResponse object at 0x7fd5329a4760>",
"date": "2022-12-27T19:44:38.964000",
"malicious": {
"score": "26%",
"types": [
"acroform",
"scripts",
"URI"
]
},
"objects": {
"found": 102,
"processed": 10,
"counter": {
"type - XObject": 31,
"subtype - Form": 27,
"type - Annot": 14,
"subtype - Widget": 12,
"type - Font": 8,
"subtype - Type1": 4,
"subtype - Image": 4,
"subtype - TrueType": 4,
"type - FontDescriptor": 3,
"type - ObjStm": 2,
"type - Encoding": 2,
"type - ExtGState": 2,
"type - XRef": 1,
"type - Outlines": 1,
"type - Pages": 1,
"subtype - XML": 1,
"type - Metadata": 1,
"type - TransformParams": 1,
"type - SigRef": 1,
"type - Sig": 1,
"type - Catalog": 1,
"type - OCG": 1,
"type - Page": 1,
"subtype - Link": 1,
"type - Border": 1,
"subtype - Popup": 1
}
},
"filters": {
"ids": [
83,
84,
79
],
"types": [],
"regex": [],
"strings": [
"URI"
],
"raw data - hexadecimal": []
}
}
~# python3.11 PDForensic.py objstm.pdf --data --logs 20 --regex '[0-9a-f]{32}' --no-csv --no-json
0 pdf_tag b'%PDF-1.5\n'
[2022-12-27 19:54:14] INFO (20) {PDForensic - PDForensic.py:634} Object 3 match the 'regex' filter.
3 object b'7 0 obj\n<< /Type /XRef /Length 32 /W [ 1 2 1 ] /Root 2 0 R /Size 8 /ID [<98e68406a8333cc2a3429ac0e8aa1fed><05fa7af561f775eeb73f00cd09fe19e7>] >>\nstream\n\x00\x00\x00\x00\x01\x00\x0f\x00\x02\x00\x01\x00\x02\x00\x01\x01\x02\x00\x01\x02\x02\x00\x01\x03\x01\x01J\x00\x01\x01\xb4\x00\nendstream\nendobj'
4 startxref
5 eof_tag b'%%EOF\n'
{
"tool": "PDForensic",
"version": "0.0.1",
"file": "objstm.pdf",
"date": "2022-12-27T19:54:14.196113",
"malicious": {
"score": "0%",
"types": []
},
"objects": {
"found": 6,
"processed": 4,
"counter": {
"type - ObjStm ": 1,
"type - XRef ": 1
}
},
"filters": {
"ids": [],
"types": [],
"strings": [],
"regex": [
"[0-9a-f]{32}"
],
"raw data - hexadecimal": []
}
}
~# python3.11 PDForensic.py objstm.pdf --data --hexa 000102
0 pdf_tag b'%PDF-1.5\n'
3 object b'7 0 obj\n<< /Type /XRef /Length 32 /W [ 1 2 1 ] /Root 2 0 R /Size 8 /ID [<98e68406a8333cc2a3429ac0e8aa1fed><05fa7af561f775eeb73f00cd09fe19e7>] >>\nstream\n\x00\x00\x00\x00\x01\x00\x0f\x00\x02\x00\x01\x00\x02\x00\x01\x01\x02\x00\x01\x02\x02\x00\x01\x03\x01\x01J\x00\x01\x01\xb4\x00\nendstream\nendobj'
4 startxref
5 eof_tag b'%%EOF\n'
{
"tool": "PDForensic",
"version": "0.0.1",
"file": "objstm.pdf",
"date": "2022-12-27T20:05:19.538251",
"malicious": {
"score": "0%",
"types": []
},
"objects": {
"found": 6,
"processed": 4,
"counter": {
"type - ObjStm ": 1,
"type - XRef ": 1
}
},
"filters": {
"ids": [],
"types": [],
"strings": [],
"regex": [],
"raw data - hexadecimal": [
"000102"
]
}
}
~#
>>> from PDForensic import PDForensic
>>> class MyPDFparser(PDForensic):
... def __init__(self):
... super().__init__("objstm.pdf")
... def handle(self, type_: str, data: bytes, typename: str = "") -> None:
... print(type_, data, typename)
>>> parser = MyPDFparser()
>>> parser.parse()
pdf_tag b'%PDF-1.5\n'
stream_object b'< /Type /ObjStm /'
type b'/Type /XRef ' XRef
startxref b'startxref\n436\n'
eof_tag b'%%EOF\n'
>>> print(parser.report())
{'tool': 'PDForensic', 'version': '0.0.1', 'file': 'objstm.pdf', 'date': '2022-12-27T20:26:27.425086', 'malicious': {'score': '10%', 'types': ['stream_object']}, 'objects': {'found': 6, 'processed': 5, 'counter': {'type - XRef ': 1}}, 'filters': {'ids': [], 'types': [], 'strings': [], 'regex': [], 'raw data - hexadecimal': []}}
>>> class MyPDFparser(PDForensic):
... def __init__(self):
... super().__init__("objstm.pdf", process_data = True, process_tags = False, filter_ = True, strings = ["/Pages"], hexa = ["000102"], regexs = ['[0-9a-f]{32}'], types = ["xref"], ids = [2])
... def handle(self, type_: str, data: bytes, typename: str = "") -> None:
... print(type_, data, typename)
>>> parser = MyPDFparser()
>>> parser.parse()
pdf_tag b'%PDF-1.5\n'
object b'1 0 obj\n<< /Type /ObjStm /Length 236 /N 4 /First 20 >>\nstream\n2 0 3 34 4 78 5 143\n<< /Pages 3 0 R /Type /Catalog >>\n<< /Count 1 /Kids [ 4 0 R ] /Type /Pages >>\n<< /Contents 6 0 R /Parent 3 0 R /Resources 5 0 R /Type /Page >>\n<< /Font << /F1 << /BaseFont /Arial /Subtype /Type1 /Type /Font >> >> >>\nendstream\nendobj'
object b"6 0 obj\n<< /Length 57 >>\nstream\nq\nBT\n/F1 55 Tf\n10 400 Td\n(http://www.corkami.com) '\nET\nQ\nendstream\nendobj"
object b'7 0 obj\n<< /Type /XRef /Length 32 /W [ 1 2 1 ] /Root 2 0 R /Size 8 /ID [<98e68406a8333cc2a3429ac0e8aa1fed><05fa7af561f775eeb73f00cd09fe19e7>] >>\nstream\n\x00\x00\x00\x00\x01\x00\x0f\x00\x02\x00\x01\x00\x02\x00\x01\x01\x02\x00\x01\x02\x02\x00\x01\x03\x01\x01J\x00\x01\x01\xb4\x00\nendstream\nendobj'
startxref b'startxref\n436\n'
eof_tag b'%%EOF\n'
>>> print(parser.report())
{'tool': 'PDForensic', 'version': '0.0.1', 'file': 'objstm.pdf', 'date': '2022-12-27T20:38:38.078297', 'malicious': {'score': '10%', 'types': ['stream_object']}, 'objects': {'found': 6, 'processed': 6, 'counter': {'type - XRef ': 1}}, 'filters': {'ids': [2], 'types': ['xref'], 'strings': ['/Pages'], 'regex': ['[0-9a-f]{32}'], 'raw data - hexadecimal': ['000102']}}
>>>
Classes | ||||||||||
|
Data | ||
__all__ = ['PDForensic'] __annotations__ = {'logger': <class 'logging.Logger'>, 'logger_critical': <class 'collections.abc.Callable'>, 'logger_debug': <class 'collections.abc.Callable'>, 'logger_error': <class 'collections.abc.Callable'>, 'logger_info': <class 'collections.abc.Callable'>, 'logger_log': <class 'collections.abc.Callable'>, 'logger_warning': <class 'collections.abc.Callable'>, 'pdf_filters': <class 're.Pattern'>, 'pdf_parser': <class 're.Pattern'>, 'pdf_streams': <class 're.Pattern'>, ...} __author_email__ = 'mauricelambert434@gmail.com' __copyright__ = '\nPDForensic Copyright (C) 2022, 2023 Maurice L...ome to redistribute it\nunder certain conditions.\n' __description__ = 'This tool analyses PDF files for Forensic Investigations' __license__ = 'GPL-3.0 License' __maintainer__ = 'Maurice Lambert' __maintainer_email__ = 'mauricelambert434@gmail.com' __url__ = 'https://github.com/mauricelambert/PDForensic' |
Author | ||
Maurice Lambert |