Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions JSAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,14 +176,13 @@ def isJavascript(content):
@param content: A string
@return: A boolean, True if it seems to contain Javascript code or False in the other case
'''
jsStrings = ['var ', ';', ')', '(', 'function ', '=', '{', '}', 'if ', 'else', 'return', 'while ', 'for ',
jsStrings = ['var ', ';', ')', '(', 'function ', '=', '{', '}', 'if(', 'if (', 'else{', 'else {','else if', 'return', 'while(', 'while (', 'for(', 'for (',
',', 'eval']
keyStrings = [';', '(', ')']
reVarInit = 'var [\w0-9]+\s*?='
reFunctionCall = '[\w0-9]+\s*?\(.*?\)\s*?;'
stringsFound = []
limit = 15
minDistinctStringsFound = 4
#JS should at least contain ';', ')', '(', 'var', '='
minDistinctStringsFound = 5
minRatio = 10
results = 0
length = len(content)
Expand Down Expand Up @@ -279,4 +278,4 @@ def unescape(escapedBytes, unicode = True):
unescapedBytes = escapedBytes
except:
return (-1, 'Error while unescaping the bytes')
return (0, unescapedBytes)
return (0, unescapedBytes)
35 changes: 32 additions & 3 deletions PDFConsole.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,20 @@
'rl': '/RunLengthDecode', 'ccittfax': '/CCITTFaxDecode', 'ccf': '/CCITTFaxDecode',
'jbig2': '/JBIG2Decode', 'dct': '/DCTDecode', 'jpx': '/JPXDecode'}


monitorizedEvents = ['/OpenAction','/AA','/Names','/AcroForm', '/XFA']
monitorizedActions = ['/JS','/JavaScript','/Launch','/SubmitForm','/ImportData']
monitorizedElements = ['/EmbeddedFiles',
'/EmbeddedFile',
'/JBIG2Decode',
'getPageNthWord',
'arguments.callee',
'/U3D',
'/PRC',
'/RichMedia',
'/Flash',
'.rawValue',
'keep.previous']
monitoring=monitorizedActions + monitorizedElements + monitorizedEvents
class PDFConsole(cmd.Cmd):
'''
Class of the peepdf interactive console. To see details about commands: http://code.google.com/p/peepdf/wiki/Commands
Expand Down Expand Up @@ -3588,7 +3601,7 @@ def do_tree(self, argv):
root = tree[i][0]
objectsInfo = tree[i][1]
if i != 0:
treeOutput += newLine + 'Version ' + str(i) + ':' + newLine * 2
treeOutput += newLine + self.staticColor + 'Version ' + str(i) + self.resetColor + ':' + newLine * 2
if root is not None:
nodesPrinted, nodeOutput = self.printTreeNode(root, objectsInfo, nodesPrinted)
treeOutput += nodeOutput
Expand Down Expand Up @@ -4641,7 +4654,23 @@ def printTreeNode(self, node, nodesInfo, expandedNodes=[], depth=0, recursive=Tr
output = ''
if nodesInfo.has_key(node):
if node not in expandedNodes or (node in expandedNodes and depth > 0):
output += '\t' * depth + nodesInfo[node][0] + ' (' + str(node) + ')' + newLine
isMonitored = False
#Check whether type of an object is in the monitoring list
nodesInformation=nodesInfo[node][0]
types=nodesInformation.split(" ")
for t in types:
if t in monitoring:
isMonitored = True
if self.pdfFile.getObject(node).containsJS():
nodesInformation += " containsJS"
isMonitored = True

if isMonitored:
output += '\t' * depth + self.warningColor + nodesInformation + self.resetColor + ' (' + str(node) + ')' + newLine
else:
output += '\t' * depth + nodesInformation + ' (' + str(node) + ')' + newLine


if node not in expandedNodes:
expandedNodes.append(node)
children = nodesInfo[node][1]
Expand Down
33 changes: 25 additions & 8 deletions PDFCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@
isManualAnalysis = False
spacesChars = ['\x00','\x09','\x0a','\x0c','\x0d','\x20']
delimiterChars = ['<<','(','<','[','{','/','%']
monitorizedEvents = ['/OpenAction ','/AA ','/Names ','/AcroForm ', '/XFA ']
monitorizedActions = ['/JS ','/JavaScript','/Launch','/SubmitForm','/ImportData']
monitorizedElements = ['/EmbeddedFiles ',
monitorizedEvents = ['/OpenAction','/AA','/Names','/AcroForm', '/XFA']
monitorizedActions = ['/JS','/JavaScript','/Launch','/SubmitForm','/ImportData']
monitorizedElements = ['/EmbeddedFiles',
'/EmbeddedFile',
'/JBIG2Decode',
'getPageNthWord',
Expand Down Expand Up @@ -83,6 +83,7 @@
'keep.previous':('Adobe Reader XFA oneOfChild Un-initialized memory vulnerability',['CVE-2013-0640']), # https://labs.portcullis.co.uk/blog/cve-2013-0640-adobe-reader-xfa-oneofchild-un-initialized-memory-vulnerability-part-1/
bmpVuln:(bmpVuln,['CVE-2013-2729']),
'app.removeToolButton':('app.removeToolButton',['CVE-2013-3346'])}
monitoring=monitorizedActions + monitorizedElements + monitorizedEvents
jsContexts = {'global':None}

class PDFObject :
Expand Down Expand Up @@ -4448,7 +4449,6 @@ def updateStats(self, id, pdfObject, delete=False):
return (0,'')



class PDFTrailer :
def __init__(self, dict, lastCrossRefSection = '0', streamPresent = False):
self.errors = []
Expand Down Expand Up @@ -6210,6 +6210,7 @@ def getStats (self):
containingURIs = self.body[version].getContainingURIs()
if len(containingURIs) > 0:
statsVersion['URIs'] = [str(len(containingURIs)), containingURIs]
statsVersion['URIDisplay'] = set(self.getURIs(version=version)[0]) #only get unique URIs
else:
statsVersion['URIs'] = None
containingJS = self.body[version].getContainingJS()
Expand Down Expand Up @@ -6310,10 +6311,16 @@ def getTree (self, version = None) :
else:
dictType = object.getDictType()
if dictType != '':
type = dictType
else:
if type == 'dictionary' and len(elements) == 1:
type = elements.keys()[0]
type += " " + dictType
# add monitorized actions, events and elements
for element in elements.keys():
if element == "/Type":
subType = elements[element].getValue()
if subType in monitoring:
type += " " + subType
if element in monitoring:
type += " " + element

references = self.getReferencesIn(id, version)
for i in range(len(references)):
referencesIds.append(int(references[i].split()[0]))
Expand Down Expand Up @@ -6840,6 +6847,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
@param fileName The name of the file to be parsed
@param forceMode Boolean to specify if ignore errors or not. Default value: False.
@param looseMode Boolean to set the loose mode when parsing objects. Default value: False.
@param manualAnalysis Boolean to specify whether JS analysis is performed. Default value: False.
@return A PDFFile instance
'''
global isForceMode, pdfFile, isManualAnalysis
Expand Down Expand Up @@ -6928,6 +6936,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis

# Getting the number of updates in the file
while fileContent.find('%%EOF') != -1:

self.readUntilSymbol(fileContent, '%%EOF')
self.readUntilEndOfLine(fileContent)
self.fileParts.append(fileContent[:self.charCounter])
Expand Down Expand Up @@ -6995,12 +7004,15 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis

# Converting the body content in PDFObjects
body = PDFBody()
# search for objects e.g. 10 0 obj
rawIndirectObjects = self.getIndirectObjects(bodyContent, looseMode)
if rawIndirectObjects != []:
for j in range(len(rawIndirectObjects)):
relativeOffset = 0
auxContent = str(bodyContent)
#raw content of object
rawObject = rawIndirectObjects[j][0]
#object header e.g. 10 0 obj
objectHeader = rawIndirectObjects[j][1]
while True:
index = auxContent.find(objectHeader)
Expand All @@ -7014,6 +7026,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
else:
auxContent = auxContent[index+len(objectHeader):]
relativeOffset += len(objectHeader)
#find object in rawObject
ret = self.createPDFIndirectObject(rawObject, forceMode, looseMode)
if ret[0] != -1:
pdfIndirectObject = ret[1]
Expand Down Expand Up @@ -7345,6 +7358,7 @@ def createPDFStream(self, dict, stream):
elements = {}
rawNames = {}
ret = self.readObject(dict[self.charCounter:], 'name')

if ret[0] == -1:
if ret[1] != 'Empty content reading object':
if isForceMode:
Expand All @@ -7356,6 +7370,7 @@ def createPDFStream(self, dict, stream):
name = None
else:
name = ret[1]

while name != None:
key = name.getValue()
rawNames[key] = name
Expand Down Expand Up @@ -7384,6 +7399,7 @@ def createPDFStream(self, dict, stream):
name = None
else:
name = ret[1]

if elements.has_key('/Type') and elements['/Type'].getValue() == '/ObjStm':
try:
pdfStream = PDFObjectStream(dict, stream, elements, rawNames, {})
Expand All @@ -7400,6 +7416,7 @@ def createPDFStream(self, dict, stream):
if e.message != '':
errorMessage += ': '+e.message
return (-1, errorMessage)

self.charCounter = realCounter
return (0,pdfStream)

Expand Down
5 changes: 5 additions & 0 deletions peepdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,11 @@ def getPeepJSON(statsDict, version, revision):
if statsVersion['URIs'] is not None:
stats += newLine + beforeStaticLabel + '\tObjects with URIs (' + \
statsVersion['URIs'][0] + '): ' + resetColor + str(statsVersion['URIs'][1])
stats += newLine + beforeStaticLabel + '\tFound URIs : ' + resetColor
for display in statsVersion['URIDisplay']:
display=str(display)
if "http" in display.lower():
stats += newLine + beforeStaticLabel + '\t\t' + resetColor + display
if COLORIZED_OUTPUT and not options.avoidColors:
beforeStaticLabel = warningColor
if statsVersion['Objects with JS code'] != None:
Expand Down