jesparza · Tholep · Oct 21, 2018 · Oct 21, 2018 · Oct 22, 2018 · Oct 27, 2018
diff --git a/JSAnalysis.py b/JSAnalysis.py
@@ -176,14 +176,13 @@ def isJavascript(content):
         @param content: A string
         @return: A boolean, True if it seems to contain Javascript code or False in the other case
     '''
-    jsStrings = ['var ', ';', ')', '(', 'function ', '=', '{', '}', 'if ', 'else', 'return', 'while ', 'for ',
+    jsStrings = ['var ', ';', ')', '(', 'function ', '=', '{', '}', 'if(', 'if (', 'else{', 'else {','else if', 'return', 'while(', 'while (', 'for(', 'for (',
                  ',', 'eval']
     keyStrings = [';', '(', ')']
-    reVarInit = 'var [\w0-9]+\s*?='
-    reFunctionCall = '[\w0-9]+\s*?\(.*?\)\s*?;'
     stringsFound = []
     limit = 15
-    minDistinctStringsFound = 4
+    #JS should at least contain ';', ')', '(', 'var', '='
+    minDistinctStringsFound = 5
     minRatio = 10
     results = 0
     length = len(content)
@@ -279,4 +278,4 @@ def unescape(escapedBytes, unicode = True):
             unescapedBytes = escapedBytes
     except:
         return (-1, 'Error while unescaping the bytes')
-    return (0, unescapedBytes)
+    return (0, unescapedBytes)
diff --git a/PDFConsole.py b/PDFConsole.py
@@ -79,7 +79,20 @@
                          'rl': '/RunLengthDecode', 'ccittfax': '/CCITTFaxDecode', 'ccf': '/CCITTFaxDecode',
                          'jbig2': '/JBIG2Decode', 'dct': '/DCTDecode', 'jpx': '/JPXDecode'}
 
-
+monitorizedEvents = ['/OpenAction','/AA','/Names','/AcroForm', '/XFA']
+monitorizedActions = ['/JS','/JavaScript','/Launch','/SubmitForm','/ImportData']
+monitorizedElements = ['/EmbeddedFiles',
+                       '/EmbeddedFile',
+                       '/JBIG2Decode',
+                       'getPageNthWord',
+                       'arguments.callee',
+                       '/U3D',
+                       '/PRC',
+                       '/RichMedia',
+                       '/Flash',
+                       '.rawValue',
+                       'keep.previous']
+monitoring=monitorizedActions + monitorizedElements + monitorizedEvents
 class PDFConsole(cmd.Cmd):
     '''
         Class of the peepdf interactive console. To see details about commands: http://code.google.com/p/peepdf/wiki/Commands
@@ -3588,7 +3601,7 @@ def do_tree(self, argv):
             root = tree[i][0]
             objectsInfo = tree[i][1]
             if i != 0:
-                treeOutput += newLine + 'Version ' + str(i) + ':' + newLine * 2
+                treeOutput += newLine + self.staticColor + 'Version ' + str(i) + self.resetColor + ':' + newLine * 2
             if root is not None:
                 nodesPrinted, nodeOutput = self.printTreeNode(root, objectsInfo, nodesPrinted)
                 treeOutput += nodeOutput
@@ -4641,7 +4654,23 @@ def printTreeNode(self, node, nodesInfo, expandedNodes=[], depth=0, recursive=Tr
         output = ''
         if nodesInfo.has_key(node):
             if node not in expandedNodes or (node in expandedNodes and depth > 0):
-                output += '\t' * depth + nodesInfo[node][0] + ' (' + str(node) + ')' + newLine
+                isMonitored = False
+                #Check whether type of an object is in the monitoring list
+                nodesInformation=nodesInfo[node][0]
+                types=nodesInformation.split(" ")
+                for t in types:
+                    if t in monitoring:
+                        isMonitored = True
+                if  self.pdfFile.getObject(node).containsJS():
+                    nodesInformation += " containsJS" 
+                    isMonitored = True
+
+                if isMonitored:
+                    output += '\t' * depth + self.warningColor + nodesInformation + self.resetColor + ' (' + str(node) + ')' + newLine
+                else:
+                    output += '\t' * depth  + nodesInformation + ' (' + str(node) + ')' + newLine
+
+
             if node not in expandedNodes:
                 expandedNodes.append(node)
                 children = nodesInfo[node][1]

diff --git a/PDFCore.py b/PDFCore.py
@@ -43,9 +43,9 @@
 isManualAnalysis = False
 spacesChars = ['\x00','\x09','\x0a','\x0c','\x0d','\x20']
 delimiterChars = ['<<','(','<','[','{','/','%']
-monitorizedEvents = ['/OpenAction ','/AA ','/Names ','/AcroForm ', '/XFA ']
-monitorizedActions = ['/JS ','/JavaScript','/Launch','/SubmitForm','/ImportData']
-monitorizedElements = ['/EmbeddedFiles ',
+monitorizedEvents = ['/OpenAction','/AA','/Names','/AcroForm', '/XFA']
+monitorizedActions = ['/JS','/JavaScript','/Launch','/SubmitForm','/ImportData']
+monitorizedElements = ['/EmbeddedFiles',
                        '/EmbeddedFile',
                        '/JBIG2Decode',
                        'getPageNthWord',
@@ -83,6 +83,7 @@
              'keep.previous':('Adobe Reader XFA oneOfChild Un-initialized memory vulnerability',['CVE-2013-0640']), # https://labs.portcullis.co.uk/blog/cve-2013-0640-adobe-reader-xfa-oneofchild-un-initialized-memory-vulnerability-part-1/
              bmpVuln:(bmpVuln,['CVE-2013-2729']),
              'app.removeToolButton':('app.removeToolButton',['CVE-2013-3346'])}
+monitoring=monitorizedActions + monitorizedElements + monitorizedEvents
 jsContexts = {'global':None}
 
 class PDFObject :
@@ -4448,7 +4449,6 @@ def updateStats(self, id, pdfObject, delete=False):
         return (0,'')                        
 
 
-
 class PDFTrailer :
     def __init__(self, dict, lastCrossRefSection = '0', streamPresent = False):
         self.errors = []
@@ -6210,6 +6210,7 @@ def getStats (self):
             containingURIs = self.body[version].getContainingURIs()
             if len(containingURIs) > 0:
                 statsVersion['URIs'] = [str(len(containingURIs)), containingURIs]
+                statsVersion['URIDisplay'] = set(self.getURIs(version=version)[0]) #only get unique URIs
             else:
                 statsVersion['URIs'] = None
             containingJS = self.body[version].getContainingJS()
@@ -6310,10 +6311,16 @@ def getTree (self, version = None) :
                         else:
                             dictType = object.getDictType()
                             if dictType != '':
-                                type = dictType
-                            else:
-                                if type == 'dictionary' and len(elements) == 1:
-                                    type = elements.keys()[0]
+                                type += " " + dictType
+                            # add monitorized actions, events and elements
+                            for element in elements.keys():
+                                if element == "/Type":
+                                    subType = elements[element].getValue()
+                                    if subType in monitoring:
+                                        type += " " + subType
+                                if element in monitoring:
+                                    type += " " + element
+
                     references = self.getReferencesIn(id, version)
                     for i in range(len(references)):
                         referencesIds.append(int(references[i].split()[0]))
@@ -6840,6 +6847,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
             @param fileName The name of the file to be parsed
             @param forceMode Boolean to specify if ignore errors or not. Default value: False.
             @param looseMode Boolean to set the loose mode when parsing objects. Default value: False.
+            @param manualAnalysis Boolean to specify whether JS analysis is performed. Default value: False.
             @return A PDFFile instance
         '''
         global isForceMode, pdfFile, isManualAnalysis
@@ -6928,6 +6936,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
 
         # Getting the number of updates in the file
         while fileContent.find('%%EOF') != -1:
+
             self.readUntilSymbol(fileContent, '%%EOF')
             self.readUntilEndOfLine(fileContent)
             self.fileParts.append(fileContent[:self.charCounter])
@@ -6995,12 +7004,15 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
 
             # Converting the body content in PDFObjects
             body = PDFBody()
+            # search for objects e.g. 10 0 obj
             rawIndirectObjects = self.getIndirectObjects(bodyContent, looseMode)
             if rawIndirectObjects != []:
                 for j in range(len(rawIndirectObjects)):
                     relativeOffset = 0
                     auxContent = str(bodyContent)
+                    #raw content of object
                     rawObject = rawIndirectObjects[j][0]
+                    #object header e.g. 10 0 obj
                     objectHeader = rawIndirectObjects[j][1]
                     while True:
                         index = auxContent.find(objectHeader)
@@ -7014,6 +7026,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
                         else:
                             auxContent = auxContent[index+len(objectHeader):]
                             relativeOffset += len(objectHeader)
+                    #find object in rawObject
                     ret = self.createPDFIndirectObject(rawObject, forceMode, looseMode)
                     if ret[0] != -1:
                         pdfIndirectObject = ret[1]
@@ -7345,6 +7358,7 @@ def createPDFStream(self, dict, stream):
         elements = {}
         rawNames = {}
         ret = self.readObject(dict[self.charCounter:], 'name')
+
         if ret[0] == -1:
             if ret[1] != 'Empty content reading object':
                 if isForceMode:
@@ -7356,6 +7370,7 @@ def createPDFStream(self, dict, stream):
                 name = None
         else:
             name = ret[1]    
+
         while name != None:
             key = name.getValue()
             rawNames[key] = name
@@ -7384,6 +7399,7 @@ def createPDFStream(self, dict, stream):
                     name = None
             else:
                 name = ret[1]
+
         if elements.has_key('/Type') and elements['/Type'].getValue() == '/ObjStm':
             try:
                 pdfStream = PDFObjectStream(dict, stream, elements, rawNames, {})
@@ -7400,6 +7416,7 @@ def createPDFStream(self, dict, stream):
                 if e.message != '':
                     errorMessage += ': '+e.message
                 return (-1, errorMessage)
+
         self.charCounter = realCounter
         return (0,pdfStream)
 

diff --git a/peepdf.py b/peepdf.py
@@ -664,6 +664,11 @@ def getPeepJSON(statsDict, version, revision):
                         if statsVersion['URIs'] is not None:
                             stats += newLine + beforeStaticLabel + '\tObjects with URIs (' + \
                                      statsVersion['URIs'][0] + '): ' + resetColor + str(statsVersion['URIs'][1])
+                            stats += newLine + beforeStaticLabel + '\tFound URIs : ' + resetColor
+                            for display in statsVersion['URIDisplay']:
+                                display=str(display)
+                                if "http" in display.lower():
+                                    stats += newLine + beforeStaticLabel + '\t\t' + resetColor + display
                         if COLORIZED_OUTPUT and not options.avoidColors:
                             beforeStaticLabel = warningColor
                         if statsVersion['Objects with JS code'] != None: