jesparza · sylvainpelissier · Jun 13, 2017
diff --git a/JSAnalysis.py b/JSAnalysis.py
@@ -251,8 +251,8 @@ def unescape(escapedBytes, unicode = True):
     else:
         unicodePadding = ''
     try:
-        if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\u') != -1 or escapedBytes.find('%') != -1:
-            if escapedBytes.lower().find('\u') != -1:
+        if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find(r'\u') != -1 or escapedBytes.find('%') != -1:
+            if escapedBytes.lower().find(r'\u') != -1:
                 splitBytes = escapedBytes.split('\\')
             else:
                 splitBytes = escapedBytes.split('%')
@@ -279,4 +279,4 @@ def unescape(escapedBytes, unicode = True):
             unescapedBytes = escapedBytes
     except:
         return (-1, 'Error while unescaping the bytes')
-    return (0, unescapedBytes)
+    return (0, unescapedBytes)
diff --git a/PDFConsole.py b/PDFConsole.py
diff --git a/PDFCore.py b/PDFCore.py
@@ -6860,11 +6860,11 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
         file = open(fileName,'rb')
         for line in file:
             if versionLine == '':
-                pdfHeaderIndex = line.find('%PDF-')
-                psHeaderIndex = line.find('%!PS-Adobe-')
+                pdfHeaderIndex = line.find(b'%PDF-')
+                psHeaderIndex = line.find(b'%!PS-Adobe-')
                 if pdfHeaderIndex != -1 or psHeaderIndex != -1:
-                    index = line.find('\r')
-                    if index != -1 and index+1 < len(line) and line[index+1] != '\n':
+                    index = line.find(b'\r')
+                    if index != -1 and index+1 < len(line) and line[index+1] != b'\n':
                         index += 1
                         versionLine = line[:index]
                         binaryLine = line[index:]
@@ -6885,9 +6885,9 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
         file.close()
 
         # Getting the specification version
-        versionLine = versionLine.replace('\r','')
-        versionLine = versionLine.replace('\n','')
-        matchVersion = re.findall('%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})',versionLine)
+        versionLine = versionLine.replace(b'\r',b'')
+        versionLine = versionLine.replace(b'\n',b'')
+        matchVersion = re.findall(b'%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})',versionLine)
         if matchVersion == []:
             if forceMode:
                 pdfFile.setVersion(versionLine)
@@ -6902,15 +6902,15 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
 
         # Getting the end of line
         if len(binaryLine) > 3:
-            if binaryLine[-2:] == '\r\n':
-                pdfFile.setEndLine('\r\n')
+            if binaryLine[-2:] == b'\r\n':
+                pdfFile.setEndLine(b'\r\n')
             else:
-                if binaryLine[-1] == '\r':
-                    pdfFile.setEndLine('\r')
-                elif binaryLine[-1] == '\n':
-                    pdfFile.setEndLine('\n')
+                if binaryLine[-1] == b'\r':
+                    pdfFile.setEndLine(b'\r')
+                elif binaryLine[-1] == b'\n':
+                    pdfFile.setEndLine(b'\n')
                 else:
-                    pdfFile.setEndLine('\n')
+                    pdfFile.setEndLine(b'\n')
 
             # Does it contain binary characters??
             if binaryLine[0] == '%' and ord(binaryLine[1]) >= 128 and ord(binaryLine[2]) >= 128 and ord(binaryLine[3]) >= 128 and ord(binaryLine[4]) >= 128:
@@ -6927,15 +6927,15 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
         pdfFile.setSHA256(hashlib.sha256(fileContent).hexdigest())
 
         # Getting the number of updates in the file
-        while fileContent.find('%%EOF') != -1:
-            self.readUntilSymbol(fileContent, '%%EOF')
+        while fileContent.find(b'%%EOF') != -1:
+            self.readUntilSymbol(fileContent, b'%%EOF')
             self.readUntilEndOfLine(fileContent)
             self.fileParts.append(fileContent[:self.charCounter])
             fileContent = fileContent[self.charCounter:]
             self.charCounter = 0
         else:
             if self.fileParts == []:
-                errorMessage = '%%EOF not found'
+                errorMessage = b'%%EOF not found'
                 if forceMode:
                     pdfFile.addError(errorMessage)
                     self.fileParts.append(fileContent)
@@ -6974,17 +6974,17 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis
             if xrefContent != None:    
                 xrefOffset = bodyOffset + len(bodyContent)
                 trailerOffset = xrefOffset + len(xrefContent)
-                bodyContent = bodyContent.strip('\r\n')
-                xrefContent = xrefContent.strip('\r\n')
-                trailerContent = trailerContent.strip('\r\n')
+                bodyContent = bodyContent.strip(b'\r\n')
+                xrefContent = xrefContent.strip(b'\r\n')
+                trailerContent = trailerContent.strip(b'\r\n')
                 trailerFound = True
                 xrefFound = True
             else:
                 if trailerContent != None:
                     xrefOffset = -1
                     trailerOffset = bodyOffset + len(bodyContent)
-                    bodyContent = bodyContent.strip('\r\n')
-                    trailerContent = trailerContent.strip('\r\n')    
+                    bodyContent = bodyContent.strip(b'\r\n')
+                    trailerContent = trailerContent.strip(b'\r\n')    
                 else:
                     errorMessage = 'PDF sections not found'
                     if forceMode:
@@ -7150,16 +7150,16 @@ def parsePDFSections(self, content, forceMode = False, looseMode = False):
         trailerContent = None
 
         global pdfFile
-        indexTrailer = content.find('trailer')
+        indexTrailer = content.find(b'trailer')
         if indexTrailer != -1:
             restContent = content[:indexTrailer]
             auxTrailer = content[indexTrailer:]
-            indexEOF = auxTrailer.find('%%EOF')
+            indexEOF = auxTrailer.find(b'%%EOF')
             if indexEOF == -1:
                 trailerContent = auxTrailer
             else:
                 trailerContent = auxTrailer[:indexEOF+5]
-            indexXref = restContent.find('xref')
+            indexXref = restContent.find(b'xref')
             if indexXref != -1:
                 bodyContent = restContent[:indexXref]
                 xrefContent = restContent[indexXref:]
@@ -7169,11 +7169,11 @@ def parsePDFSections(self, content, forceMode = False, looseMode = False):
                     pdfFile.addError('Xref section not found')
             return [bodyContent,xrefContent,trailerContent]                
 
-        indexTrailer = content.find('startxref')
+        indexTrailer = content.find(b'startxref')
         if indexTrailer != -1:
             restContent = content[:indexTrailer]
             auxTrailer = content[indexTrailer:]
-            indexEOF = auxTrailer.find('%%EOF')
+            indexEOF = auxTrailer.find(b'%%EOF')
             if indexEOF == -1:
                 trailerContent = auxTrailer
             else:
@@ -8053,13 +8053,13 @@ def readUntilEndOfLine(self, content):
             @return A tuple (status,statusContent), where statusContent is the characters read in case status = 0 or an error in case status = -1
         '''
         global pdfFile
-        if not isinstance(content,str):
+        if not isinstance(content, bytes):
             return (-1,'Bad string')
         errorMessage = []
         oldCharCounter = self.charCounter
         tmpContent = content[self.charCounter:]
         for char in tmpContent:
-            if char == '\r' or char == '\n':
+            if char == b'\r' or char == b'\n':
                 return (0,content[oldCharCounter:self.charCounter])
             self.charCounter += 1
         else:

diff --git a/PDFCrypto.py b/PDFCrypto.py
@@ -26,7 +26,13 @@
 '''    
 
 import hashlib,struct,random,warnings,aes,sys
-from itertools import cycle, izip
+from itertools import cycle
+
+try:
+    from itertools import izip
+except ImportError:
+    izip = zip
+
 warnings.filterwarnings("ignore")
 
 paddingString = '\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A'
@@ -332,4 +338,4 @@ def xor(bytes, key):
         @return: The xored bytes
     '''
     key = cycle(key)
-    return ''.join(chr(ord(x) ^ ord(y)) for (x,y) in izip(bytes, key))
+    return ''.join(chr(ord(x) ^ ord(y)) for (x,y) in izip(bytes, key))
diff --git a/PDFUtils.py b/PDFUtils.py
@@ -25,7 +25,14 @@
     Module with some misc functions
 '''
 
-import os, re, htmlentitydefs, json, urllib, urllib2
+import os, re, json, urllib
+
+try:
+    from html.entities import name2codepoint
+    from urllib.request import urlopen, Request
+except ImportError:
+    from htmlentitydefs import name2codepoint
+    from urllib2 import urlopen, Request
 
 def clearScreen():
 	'''
@@ -325,7 +332,7 @@ def numToHex(num, numBytes):
 	        hexString += chr(int(hexNumber[i]+hexNumber[i+1],16))
 	    hexString = '\0'*(numBytes-len(hexString))+hexString
     except:
-		return (-1,'Error in hexadecimal conversion')
+        return (-1,'Error in hexadecimal conversion')
     return (0,hexString)
 
 def numToString(num, numDigits):
@@ -369,7 +376,7 @@ def fixup(m):
         else:
             # named entity
             try:
-                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+                text = unichr(name2codepoint[text[1:-1]])
             except KeyError:
                 pass
         return text # leave as is
@@ -427,13 +434,13 @@ def vtcheck(md5, vtKey):
     parameters = {'resource':md5,'apikey':vtKey}
     try:
         data = urllib.urlencode(parameters)
-        req = urllib2.Request(vtUrl, data)
-        response = urllib2.urlopen(req)
+        req = Request(vtUrl, data)
+        response = urlopen(req)
         jsonResponse = response.read()
     except:
         return (-1, 'The request to VirusTotal has not been successful')
     try:
         jsonDict = json.loads(jsonResponse)
     except:
         return (-1, 'An error has occurred while parsing the JSON response from VirusTotal')
-    return (0, jsonDict)
+    return (0, jsonDict)
diff --git a/ccitt.py b/ccitt.py
@@ -36,7 +36,7 @@ def write(self, data, length):
         """
         """
         if not ( length >= 0 and (1 << length) > data ):
-            raise BitWriterException, "Invalid data length"
+            raise BitWriterException("Invalid data length")
 
         if length == 8 and not self._last_byte and self._bit_ptr == 0:
             self._data += chr(data)
@@ -108,7 +108,7 @@ def pos(self, bits):
         """
         """
         if bits > self.size:
-            raise BitReaderException, "Pointer position out of data"
+            raise BitReaderException("Pointer position out of data")
 
         pbyte = bits >> 3
         pbit = bits - (pbyte <<3)
@@ -118,9 +118,9 @@ def peek(self, length):
         """
         """
         if length <= 0:
-            raise BitReaderException, "Invalid read length"
+            raise BitReaderException("Invalid read length")
         elif ( self.pos + length ) > self.size:
-            raise BitReaderException, "Insufficient data"
+            raise BitReaderException("Insufficient data")
 
         n = 0
         byte_ptr, bit_ptr = self._byte_ptr, self._bit_ptr
@@ -228,7 +228,7 @@ class CCITTFax(object):
         63  : codeword('00110100')
         }
 
-    WHITE_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in WHITE_TERMINAL_ENCODE_TABLE.iteritems() )
+    WHITE_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in WHITE_TERMINAL_ENCODE_TABLE.items() )
 
     BLACK_TERMINAL_ENCODE_TABLE = {
         0   : codeword('0000110111'),
@@ -297,7 +297,7 @@ class CCITTFax(object):
         63  : codeword('000001100111')
         }
 
-    BLACK_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in BLACK_TERMINAL_ENCODE_TABLE.iteritems() )
+    BLACK_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in BLACK_TERMINAL_ENCODE_TABLE.items() )
 
     WHITE_CONFIGURATION_ENCODE_TABLE = {
         64    : codeword('11011'),
@@ -343,7 +343,7 @@ class CCITTFax(object):
         2560  : codeword('000000011111')
         }
 
-    WHITE_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in WHITE_CONFIGURATION_ENCODE_TABLE.iteritems() )
+    WHITE_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in WHITE_CONFIGURATION_ENCODE_TABLE.items() )
 
     BLACK_CONFIGURATION_ENCODE_TABLE = {
         64    : codeword('0000001111'),
@@ -389,7 +389,7 @@ class CCITTFax(object):
         2560  : codeword('000000011111')
         }
 
-    BLACK_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in BLACK_CONFIGURATION_ENCODE_TABLE.iteritems() )
+    BLACK_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in BLACK_CONFIGURATION_ENCODE_TABLE.items() )
 
     def __init__(self, ):
         """
@@ -422,7 +422,7 @@ def decode(self, stream, k = 0, eol = False, byteAlign = False, columns = 1728,
 
             if bitr.peek(self.EOL[1]) != self.EOL[0]:
                 if eol:
-                    raise Exception, "No end-of-line pattern found (at bit pos %d/%d)" % (bitr.pos, bitr.size)
+                    raise Exception("No end-of-line pattern found (at bit pos %d/%d)" % (bitr.pos, bitr.size))
             else:
                 bitr.pos += self.EOL[1]
 
@@ -433,11 +433,11 @@ def decode(self, stream, k = 0, eol = False, byteAlign = False, columns = 1728,
                 else:
                     bit_length = self.get_black_bits(bitr)
                 if bit_length == None:
-                    raise Exception, "Unfinished line (at bit pos %d/%d), %s" % (bitr.pos, bitr.size, bitw.data)
+                    raise Exception("Unfinished line (at bit pos %d/%d), %s" % (bitr.pos, bitr.size, bitw.data))
 
                 line_length += bit_length
                 if line_length > columns:
-                    raise Exception, "Line is too long (at bit pos %d/%d)" % (bitr.pos, bitr.size)
+                    raise Exception("Line is too long (at bit pos %d/%d)" % (bitr.pos, bitr.size))
 
                 bitw.write( (current_color << bit_length) - current_color, bit_length )
 
@@ -486,4 +486,4 @@ def get_color_bits(self, bitr, config_words, term_words):
 
                     return bits
 
-        return None
+        return None
diff --git a/lzw.py b/lzw.py
@@ -878,7 +878,7 @@ def bitstobytes(bits):
 try:
     from cStringIO import StringIO
 except ImportError:
-    from StringIO import StringIO
+    from io import StringIO
 
 
 ##  LZWDecoder
@@ -969,4 +969,4 @@ def lzwdecode(data):
     '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42'
     """
     fp = StringIO(data)
-    return ''.join(LZWDecoder(fp).run())
+    return ''.join(LZWDecoder(fp).run())