diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..87d7b69 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +.cache +.idea +peepdf.egg-info/ diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..fcb1ca1 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,35 @@ +language: python + +matrix: + fast_finish: true + include: + - python: 2.7 + - python: 3.5 + - python: 3.6 + - os: osx + language: generic + +before_install: + - | + if [[ $TRAVIS_OS_NAME == "osx" ]]; then + # The following wasn't required in the past and therefore may become + # obsolete once again in the future. Let's wait and see. + wget https://bootstrap.pypa.io/get-pip.py + sudo python get-pip.py + sudo pip install virtualenv + virtualenv $HOME + source $HOME/bin/activate + fi + - '[[ $TRAVIS_OS_NAME == "linux" ]] && sudo apt-get update -qq || true' + - '[[ $TRAVIS_OS_NAME == "linux" ]] && sudo apt-get install python-dev libffi-dev libxml2-dev libxslt1-dev libjpeg-dev || true' + +install: + - pip install -e . + - pip install pytest pytest-cov mock coveralls + +script: + - py.test --cov=peepdf + +after_success: + - codecov + - coveralls diff --git a/PDFUtils.py b/PDFUtils.py deleted file mode 100644 index 6423831..0000000 --- a/PDFUtils.py +++ /dev/null @@ -1,439 +0,0 @@ -# -# peepdf is a tool to analyse and modify PDF files -# http://peepdf.eternal-todo.com -# By Jose Miguel Esparza -# -# Copyright (C) 2011-2017 Jose Miguel Esparza -# -# This file is part of peepdf. -# -# peepdf is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# peepdf is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with peepdf. If not, see . -# - -''' - Module with some misc functions -''' - -import os, re, htmlentitydefs, json, urllib, urllib2 - -def clearScreen(): - ''' - Simple method to clear the screen depending on the OS - ''' - if os.name == 'nt': - os.system('cls') - elif os.name == 'posix': - os.system('reset') - elif os.name == 'mac': - os.system('clear') - -def countArrayElements(array): - ''' - Simple method to count the repetitions of elements in an array - - @param array: An array of elements - @return: A tuple (elements,counters), where elements is a list with the distinct elements and counters is the list with the number of times they appear in the array - ''' - elements = [] - counters = [] - for element in array: - if element in elements: - indx = elements.index(element) - counters[indx] += 1 - else: - elements.append(element) - counters.append(1) - return elements,counters - -def countNonPrintableChars(string): - ''' - Simple method to return the non printable characters found in an string - - @param string: A string - @return: Number of non printable characters in the string - ''' - counter = 0 - for i in range(len(string)): - if ord(string[i]) <= 31 or ord(string[i]) > 127: - counter += 1 - return counter - -def decodeName(name): - ''' - Decode the given PDF name - - @param name: A PDFName string to decode - @return: A tuple (status,statusContent), where statusContent is the decoded PDF name in case status = 0 or an error in case status = -1 - ''' - decodedName = name - hexNumbers = re.findall('#([0-9a-f]{2})', name, re.DOTALL | re.IGNORECASE) - for hexNumber in hexNumbers: - try: - decodedName = decodedName.replace('#'+hexNumber,chr(int(hexNumber,16))) - except: - return (-1,'Error decoding name') - return (0,decodedName) - -def decodeString(string): - ''' - Decode the given PDF string - - @param string: A PDFString to decode - @return A tuple (status,statusContent), where statusContent is the decoded PDF string in case status = 0 or an error in case status = -1 - ''' - decodedString = string - octalNumbers = re.findall('\\\\([0-7]{1-3})', decodedString, re.DOTALL) - for octal in octalNumbers: - try: - decodedString = decodedString.replace('\\\\'+octal,chr(int(octal,8))) - except: - return (-1,'Error decoding string') - return (0,decodedString) - -def encodeName(name): - ''' - Encode the given PDF name - - @param name: A PDFName string to encode - @return: A tuple (status,statusContent), where statusContent is the encoded PDF name in case status = 0 or an error in case status = -1 - ''' - encodedName = '' - if name[0] == '/': - name = name[1:] - for char in name: - if char == '\0': - encodedName += char - else: - try: - hex = '%x' % ord(char) - encodedName += '#'+hex - except: - return (-1,'Error encoding name') - return (0,'/'+encodedName) - -def encodeString(string): - ''' - Encode the given PDF string - - @param string: A PDFString to encode - @return: A tuple (status,statusContent), where statusContent is the encoded PDF string in case status = 0 or an error in case status = -1 - ''' - encodedString = '' - try: - for char in string: - octal = '%o' % ord(char) - encodedString += '\\'+(3-len(octal))*'0'+octal - except: - return (-1,'Error encoding string') - return (0,encodedString) - -def escapeRegExpString(string): - ''' - Escape the given string to include it as a regular expression - - @param string: A regular expression to be escaped - @return: Escaped string - ''' - toEscapeChars = ['\\','(',')','.','|','^','$','*','+','?','[',']'] - escapedValue = '' - for i in range(len(string)): - if string[i] in toEscapeChars: - escapedValue += '\\'+string[i] - else: - escapedValue += string[i] - return escapedValue - -def escapeString(string): - ''' - Escape the given string - - @param string: A string to be escaped - @return: Escaped string - ''' - toEscapeChars = ['\\','(',')'] - escapedValue = '' - for i in range(len(string)): - if string[i] in toEscapeChars and (i == 0 or string[i-1] != '\\'): - if string[i] == '\\': - if len(string) > i+1 and re.match('[0-7]',string[i+1]): - escapedValue += string[i] - else: - escapedValue += '\\'+string[i] - else: - escapedValue += '\\'+string[i] - elif string[i] == '\r': - escapedValue += '\\r' - elif string[i] == '\n': - escapedValue += '\\n' - elif string[i] == '\t': - escapedValue += '\\t' - elif string[i] == '\b': - escapedValue += '\\b' - elif string[i] == '\f': - escapedValue += '\\f' - else: - escapedValue += string[i] - return escapedValue - -def getBitsFromNum(num, bitsPerComponent = 8): - ''' - Makes the conversion between number and bits - - @param num: Number to be converted - @param bitsPerComponent: Number of bits needed to represent a component - @return: A tuple (status,statusContent), where statusContent is the string containing the resulting bits in case status = 0 or an error in case status = -1 - ''' - if not isinstance(num,int): - return (-1,'num must be an integer') - if not isinstance(bitsPerComponent,int): - return (-1,'bitsPerComponent must be an integer') - try: - bitsRepresentation = bin(num) - bitsRepresentation = bitsRepresentation.replace('0b','') - mod = len(bitsRepresentation) % 8 - if mod != 0: - bitsRepresentation = '0'*(8-mod) + bitsRepresentation - bitsRepresentation = bitsRepresentation[-1*bitsPerComponent:] - except: - return (-1,'Error in conversion from number to bits') - return (0,bitsRepresentation) - - -def getNumsFromBytes(bytes, bitsPerComponent = 8): - ''' - Makes the conversion between bytes and numbers, depending on the number of bits used per component. - - @param bytes: String representing the bytes to be converted - @param bitsPerComponent: Number of bits needed to represent a component - @return: A tuple (status,statusContent), where statusContent is a list of numbers in case status = 0 or an error in case status = -1 - ''' - if not isinstance(bytes,str): - return (-1,'bytes must be a string') - if not isinstance(bitsPerComponent,int): - return (-1,'bitsPerComponent must be an integer') - outputComponents = [] - bitsStream = '' - for byte in bytes: - try: - bitsRepresentation = bin(ord(byte)) - bitsRepresentation = bitsRepresentation.replace('0b','') - bitsRepresentation = '0'*(8-len(bitsRepresentation)) + bitsRepresentation - bitsStream += bitsRepresentation - except: - return (-1,'Error in conversion from bytes to bits') - - try: - for i in range(0,len(bitsStream),bitsPerComponent): - bytes = '' - bits = bitsStream[i:i+bitsPerComponent] - num = int(bits,2) - outputComponents.append(num) - except: - return (-1,'Error in conversion from bits to bytes') - return (0,outputComponents) - -def getBytesFromBits(bitsStream): - ''' - Makes the conversion between bits and bytes. - - @param bitsStream: String representing a chain of bits - @return: A tuple (status,statusContent), where statusContent is the string containing the resulting bytes in case status = 0 or an error in case status = -1 - ''' - if not isinstance(bitsStream,str): - return (-1,'The bitsStream must be a string') - bytes = '' - if re.match('[01]*$',bitsStream): - try: - for i in range(0,len(bitsStream),8): - bits = bitsStream[i:i+8] - byte = chr(int(bits,2)) - bytes += byte - except: - return (-1,'Error in conversion from bits to bytes') - return (0,bytes) - else: - return (-1,'The format of the bit stream is not correct') - -def getBytesFromFile(filename, offset, numBytes): - ''' - Returns the number of bytes specified from a file, starting from the offset specified - - @param filename: Name of the file - @param offset: Bytes offset - @param numBytes: Number of bytes to retrieve - @return: A tuple (status,statusContent), where statusContent is the bytes read in case status = 0 or an error in case status = -1 - ''' - if not isinstance(offset,int) or not isinstance(numBytes,int): - return (-1,'The offset and the number of bytes must be integers') - if os.path.exists(filename): - fileSize = os.path.getsize(filename) - bytesFile = open(filename,'rb') - bytesFile.seek(offset) - if offset+numBytes > fileSize: - bytes = bytesFile.read() - else: - bytes = bytesFile.read(numBytes) - bytesFile.close() - return (0,bytes) - else: - return (-1,'File does not exist') - -def hexToString(hexString): - ''' - Simple method to convert an hexadecimal string to ascii string - - @param hexString: A string in hexadecimal format - @return: A tuple (status,statusContent), where statusContent is an ascii string in case status = 0 or an error in case status = -1 - ''' - string = '' - if len(hexString) % 2 != 0: - hexString = '0'+hexString - try: - for i in range(0,len(hexString),2): - string += chr(int(hexString[i]+hexString[i+1],16)) - except: - return (-1,'Error in hexadecimal conversion') - return (0,string) - -def numToHex(num, numBytes): - ''' - Given a number returns its hexadecimal format with the specified length, adding '\0' if necessary - - @param num: A number (int) - @param numBytes: Length of the output (int) - @return: A tuple (status,statusContent), where statusContent is a number in hexadecimal format in case status = 0 or an error in case status = -1 - ''' - hexString = '' - if not isinstance(num,int): - return (-1,'Bad number') - try: - hexNumber = hex(num)[2:] - if len(hexNumber) % 2 != 0: - hexNumber = '0'+hexNumber - for i in range(0,len(hexNumber)-1,2): - hexString += chr(int(hexNumber[i]+hexNumber[i+1],16)) - hexString = '\0'*(numBytes-len(hexString))+hexString - except: - return (-1,'Error in hexadecimal conversion') - return (0,hexString) - -def numToString(num, numDigits): - ''' - Given a number returns its string format with the specified length, adding '0' if necessary - - @param num: A number (int) - @param numDigits: Length of the output string (int) - @return: A tuple (status,statusContent), where statusContent is a number in string format in case status = 0 or an error in case status = -1 - ''' - if not isinstance(num,int): - return (-1,'Bad number') - strNum = str(num) - if numDigits < len(strNum): - return (-1,'Bad digit number') - for i in range(numDigits-len(strNum)): - strNum = '0' + strNum - return (0,strNum) - -def unescapeHTMLEntities(text): - ''' - Removes HTML or XML character references and entities from a text string. - - @param text The HTML (or XML) source text. - @return The plain text, as a Unicode string, if necessary. - - Author: Fredrik Lundh - Source: http://effbot.org/zone/re-sub.htm#unescape-html - ''' - def fixup(m): - text = m.group(0) - if text[:2] == "&#": - # character reference - try: - if text[:3] == "&#x": - return unichr(int(text[3:-1], 16)) - else: - return unichr(int(text[2:-1])) - except ValueError: - pass - else: - # named entity - try: - text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) - except KeyError: - pass - return text # leave as is - return re.sub("&#?\w+;", fixup, text) - -def unescapeString(string): - ''' - Unescape the given string - - @param string: An escaped string - @return: Unescaped string - ''' - toUnescapeChars = ['\\','(',')'] - unescapedValue = '' - i = 0 - while i < len(string): - if string[i] == '\\' and i != len(string)-1: - if string[i+1] in toUnescapeChars: - if string[i+1] == '\\': - unescapedValue += '\\' - i += 1 - else: - pass - elif string[i+1] == 'r': - i += 1 - unescapedValue += '\r' - elif string[i+1] == 'n': - i += 1 - unescapedValue += '\n' - elif string[i+1] == 't': - i += 1 - unescapedValue += '\t' - elif string[i+1] == 'b': - i += 1 - unescapedValue += '\b' - elif string[i+1] == 'f': - i += 1 - unescapedValue += '\f' - else: - unescapedValue += string[i] - else: - unescapedValue += string[i] - i += 1 - return unescapedValue - -def vtcheck(md5, vtKey): - ''' - Function to check a hash on VirusTotal and get the report summary - - @param md5: The MD5 to check (hexdigest) - @param vtKey: The VirusTotal API key needed to perform the request - @return: A dictionary with the result of the request - ''' - vtUrl = 'https://www.virustotal.com/vtapi/v2/file/report' - parameters = {'resource':md5,'apikey':vtKey} - try: - data = urllib.urlencode(parameters) - req = urllib2.Request(vtUrl, data) - response = urllib2.urlopen(req) - jsonResponse = response.read() - except: - return (-1, 'The request to VirusTotal has not been successful') - try: - jsonDict = json.loads(jsonResponse) - except: - return (-1, 'An error has occurred while parsing the JSON response from VirusTotal') - return (0, jsonDict) \ No newline at end of file diff --git a/README b/README index bd882f6..3ad6a23 100644 --- a/README +++ b/README @@ -6,10 +6,16 @@ http://twitter.com/peepdf ** Dependencies ** -- In order to analyse Javascript code "PyV8" is needed: +Here's what I did to make the extra libraries work - http://code.google.com/p/pyv8/ +`git clone https://github.com/harakan/peepdf` +`cd peepdf && python3 setup.py install` +`git clone git@github.com:area1/stpyv8.git` +`sudo bash install-ubuntu.sh` +`sudo python3 setup.py install` +`pip3 install pylibemu` +... Hopefully that works - The "sctest" command is a wrapper of "sctest" (libemu). Besides libemu pylibemu is used and must be installed: @@ -17,19 +23,16 @@ http://twitter.com/peepdf https://github.com/buffer/pylibemu -- To support XML output "lxml" is needed: - - http://lxml.de/installation.html - - -- Included modules: lzw, colorama, jsbeautifier, ccitt, pythonaes (Thanks to all the developers!!) - +- Included modules: lzw, ccitt (Thanks to all the developers!!) ** Installation ** -No installation is needed apart of the commented dependencies, just execute it! +Run, in peepdf directory + + easy_install . +The setup script handles the installation of jsbeautifier, colorama, pythonaes and lxml ** Execution ** @@ -44,33 +47,35 @@ There are two important options when peepdf is executed: Shows the statistics of the file after being decoded/decrypted and analysed: - python peepdf.py [options] pdf_file + peepdf.py [options] pdf_file * Interactive console Executes the interactive console to let play with the PDF file: - python peepdf.py -i [options] pdf_file + peepdf.py -i [options] pdf_file If no PDF file is specified it's possible to use the decode/encode/js*/sctest commands and create a new PDF file: - python peepdf.py -i + peepdf.py -i * Batch execution It's possible to use a commands file to specify the commands to be executed in the batch mode. This type of execution is good to automatise analysis of several files: - python peepdf.py [options] -s commands_file pdf_file + peepdf.py [options] -s commands_file pdf_file ** Updating ** -Just type this and you will be updated to the latest version from the repository: +The option has been desactivated as it is not working for now. +To update, cd to peepdf directory and type: - python peepdf.py -u + git pull origin master + easy_install . diff --git a/README.md b/README.md index 25dec3d..5d6d6d2 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,57 @@ -peepdf is a **Python tool to explore PDF files** in order to find out if the file can be harmful or not. The aim of this tool is to provide all the necessary components that +peepdf is a **Python3 tool to explore PDF files** in order to find out if the file can be harmful or not. The aim of this tool is to provide all the necessary components that a security researcher could need in a PDF analysis without using 3 or 4 tools to make all the tasks. With peepdf it's possible to see all the objects in the document showing the suspicious elements, supports all the most used filters and encodings, it can parse different versions of a file, object streams and encrypted files. With the installation -of [PyV8](https://code.google.com/p/pyv8) and [Pylibemu](https://github.com/buffer/pylibemu) it provides **Javascript and shellcode analysis** wrappers too. Apart of this it's able to create new PDF files and to modify/obfuscate existent ones. - -The main functionalities of peepdf are the following: +of python3's version of googles V8 library [stPyV8](https://github.com/area1/stpyv8) and [Pylibemu](https://github.com/buffer/pylibemu) it provides **Javascript and shellcode analysis** wrappers too. Apart of this it's able to create new PDF files and to modify/obfuscate existent ones. + +**Maintenance and new features** + - This is a half finished python3 port with a few outstanding issues. I pulled a few broken pieces together and made the install instruction in hopes that it helps a wider audience. pull requests and testing appreciated + +**Installation:** Here's what I did to make the extra libraries work + - Note: This installs peepdf as a user, no sudo needed. + + * This repo: +`git clone https://github.com/harakan/peepdf` +`cd peepdf && python3 setup.py install --user` + * Required python libraries: +`pip3 install -r requirements` + * (OPTIONAL) Infamous PyV8 Library for executing javascript. This uses the new stpyv8 fork and installs system wide for now.: +`git clone git@github.com:area1/stpyv8.git` +`sudo bash install-ubuntu.sh` +`sudo python3 setup.py install` + * (OPTIONAL) Install the libemu: +`pip3 install pylibemu --user` + +... and hopefully that works! Here's a few extra things to try if stuff doesn't: + * pip3 doesn't work with Windows 10's linux subsystem python3.5 version. Here's a fix: + 1st: `sudo apt install python3-pip` 2nd if 1st doesn't work: `curl -fsSL https://bootstrap.pypa.io/pip/3.5/get-pip.py | python3.5` + +**Hints to get you started:** + + * Basic usage which works for most pdfs: `peepdf -lf myPDF.pdf` + * Interactive Console: `peepdf -lfi myPDF.pdf` + * `peepdf -h`: +``` +Options: + -h, --help show this help message and exit + -i, --interactive Sets console mode. + -s SCRIPTFILE, --load-script=SCRIPTFILE + Loads the commands stored in the specified file and + execute them. + -c, --check-vt Checks the hash of the PDF file on VirusTotal. + -f, --force-mode Sets force parsing mode to ignore errors. + -l, --loose-mode Sets loose parsing mode to catch malformed objects. + -m, --manual-analysis + Avoids automatic Javascript analysis. Useful with + eternal loops like heap spraying. + -g, --grinch-mode Avoids colorized output in the interactive console. + -v, --version Shows program's version number. + -x, --xml Shows the document information in XML format. + -j, --json Shows the document information in JSON format. + -C COMMANDS, --command=COMMANDS + Specifies a command from the interactive console to be + executed. +``` **Analysis:** @@ -48,8 +95,41 @@ The main functionalities of peepdf are the following: * Embedded PDFs analysis * Improving automatic Javascript analysis - * GUI - + * Some broken features including decoding errors that happened during the port + +**Example Output:** +``` +File: myPDF.pdf +MD5: b51d433e5f675ca46bfb816512f9afe3 +SHA1: c5012522518ec46e989187cc2a4b7bce2a384ab5 +SHA256: f6aceeb1399f059cb48692526a599c54fec31ee5a8c8016848bee4a831b40d2a +Size: 964220 bytes +Version: 1.4 +Binary: True +Linearized: True +Encrypted: False +Updates: 1 +Objects: 72 +Streams: 30 +URIs: 0 +Comments: 0 +Errors: 0 + +Version 0: + Catalog: 34 + Info: 32 + Objects (1): [33] + Streams (0): [] + +Version 1: + Catalog: No + Info: No + Objects (71): [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72] + Errors (9): [36, 37, 15, 16, 21, 22, 23, 28, 29] + Streams (30): [72, 36, 37, 43, 44, 49, 50, 53, 57, 61, 65, 69, 71, 2, 4, 6, 8, 10, 12, 14, 15, 16, 19, 21, 22, 23, 26, 28, 29, 31] + Encoded (30): [72, 36, 37, 43, 44, 49, 50, 53, 57, 61, 65, 69, 71, 2, 4, 6, 8, 10, 12, 14, 15, 16, 19, 21, 22, 23, 26, 28, 29, 31] + Decoding errors (9): [36, 37, 15, 16, 21, 22, 23, 28, 29] +``` **Related articles:** @@ -64,12 +144,10 @@ The main functionalities of peepdf are the following: * [Analyzing Suspicious PDF Files With Peepdf](http://blog.zeltser.com/post/6780160077/peepdf-malicious-pdf-analysis) -**Included in:** +**Python2 version previously included in:** * [REMnux](http://zeltser.com/remnux/) * [BackTrack 5](https://www.backtrack-linux.com/forensics-auditor/) * [Kali Linux](http://www.kali.org/) -**You are free to contribute with feedback, bugs, patches, etc. Any help is welcome. Also, if you really enjoy using peepdf, you think it is worth it and you feel really generous today you can donate some bucks to the project ;) Thanks!** - -[![](https://www.paypal.com/en_US/i/btn/btn_donateCC_LG.gif)](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=X5RRGLX5DTNKS) \ No newline at end of file +**You are free to contribute with feedback, bugs, patches, etc. Any help is welcome** diff --git a/aespython/__init__.py b/aespython/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/aespython/aes_cipher.py b/aespython/aes_cipher.py deleted file mode 100644 index fdf4c5d..0000000 --- a/aespython/aes_cipher.py +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env python -""" -AES Block Cipher. - -Performs single block cipher decipher operations on a 16 element list of integers. -These integers represent 8 bit bytes in a 128 bit block. -The result of cipher or decipher operations is the transformed 16 element list of integers. - -Running this file as __main__ will result in a self-test of the algorithm. - -Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf - -Copyright (c) 2010, Adam Newman http://www.caller9.com/ -Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php -""" -__author__ = "Adam Newman" - -#Normally use relative import. In test mode use local import. -try:from .aes_tables import sbox,i_sbox,galI,galNI -except ValueError:from aes_tables import sbox,i_sbox,galI,galNI -ups=",".join("s%x"%x for x in range(16)) -upr=ups.replace("s","r") -mix=",".join(",".join(("g{0}[s%x]^g{1}[s%x]^g{2}[s%x]^g{3}[s%x]^r%x"%(i+(i[0]+(0,3,2,1)[j],))).format(j&3,j+1&3,j+2&3,j+3&3) for j in (0,3,2,1)) for i in ((0,1,2,3),(4,5,6,7),(8,9,10,11),(12,13,14,15))).replace("g2","g").replace("g3","g") -i=mix.find("g[") -while i!=-1: - mix=mix[:i]+mix[i+2:i+4]+mix[i+5:] - i=mix.find("g[",i) -imix=",".join(",".join(("g{0}[s%x]^g{1}[s%x]^g{2}[s%x]^g{3}[s%x]"%i).format(j&3,j+1&3,j+2&3,j+3&3) for j in (0,3,2,1)) for i in ((0,1,2,3),(4,5,6,7),(8,9,10,11),(12,13,14,15))) -csl=["s%x"%(x*5&15) for x in range(16)] -csr=["s%x"%(x*-3&15) for x in range(16)] -box=",".join("s[%s]"%i for i in csl) -ibox=",".join("s[%s]^r%x"%i for i in zip(csr,range(16))) -xor=",".join("s[%s]^r%x"%i for i in zip(csl,range(16))) -xori=";".join("s%x^=r%x"%(i,i) for i in range(16)) -ciph="""def decipher_block(f,s): - g0,g1,g2,g3=galNI;ek=f._expanded_key;S=s+[0]*(16-len(s));s=sbox;R=ek[:16];X - for f in range(!16):R=ek[f:f+16];S=B;S=M - R=ek[f+16:] - return """.replace("S",ups).replace("R",upr).replace("X",xori) -class AESCipher: - def __init__(self,expanded_key): - self._expanded_key=expanded_key - self._Nr=len(expanded_key)-16 - exec(ciph.replace("g2,g3","").replace("dec","c").replace("!","16,f._Nr,").replace("B",box).replace("M",mix)+xor) - exec(ciph.replace("NI","I").replace(":16","f._Nr:").replace("f+16:",":16").replace("!","f._Nr-16,0,-").replace("sbox","i_sbox").replace("B",ibox).replace("M",imix)+ibox) -import unittest -class TestCipher(unittest.TestCase): - def test_cipher(self): - """Test AES cipher with all key lengths""" - import test_keys - import key_expander - test_data = test_keys.TestKeys() - for key_size in 128, 192, 256: - test_key_expander = key_expander.KeyExpander(key_size) - test_expanded_key = test_key_expander.expand(test_data.test_key[key_size]) - test_cipher = AESCipher(test_expanded_key) - test_result_ciphertext = test_cipher.cipher_block(test_data.test_block_plaintext) - self.assertEquals(len([i for i, j in zip(test_result_ciphertext, test_data.test_block_ciphertext_validated[key_size]) if i == j]), - 16,msg='Test %d bit cipher'%key_size) - test_result_plaintext = test_cipher.decipher_block(test_data.test_block_ciphertext_validated[key_size]) - self.assertEquals(len([i for i, j in zip(test_result_plaintext, test_data.test_block_plaintext) if i == j]), - 16,msg='Test %d bit decipher'%key_size) -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/aespython/aes_tables.py b/aespython/aes_tables.py deleted file mode 100644 index bf6eb55..0000000 --- a/aespython/aes_tables.py +++ /dev/null @@ -1,156 +0,0 @@ -""" -Instantiate AES tables for rcon,sbox,i_sbox,and galois_lookup. - -Copyright (c) 2010,Adam Newman http://www.caller9.com/ -Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php -""" -__author__ = "Adam Newman" -rcon=( -0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a, -0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39, -0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a, -0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8, -0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef, -0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc, -0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b, -0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,0xb3, -0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f,0x25,0x4a,0x94, -0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04,0x08,0x10,0x20, -0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63,0xc6,0x97,0x35, -0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd,0x61,0xc2,0x9f, -0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb,0x8d,0x01,0x02,0x04, -0x08,0x10,0x20,0x40,0x80,0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,0x5e,0xbc,0x63, -0xc6,0x97,0x35,0x6a,0xd4,0xb3,0x7d,0xfa,0xef,0xc5,0x91,0x39,0x72,0xe4,0xd3,0xbd, -0x61,0xc2,0x9f,0x25,0x4a,0x94,0x33,0x66,0xcc,0x83,0x1d,0x3a,0x74,0xe8,0xcb) -sbox=( -0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, -0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, -0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, -0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, -0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, -0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, -0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, -0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, -0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, -0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, -0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, -0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, -0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, -0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, -0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, -0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16) -i_sbox=( -0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb, -0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb, -0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e, -0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25, -0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92, -0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84, -0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06, -0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b, -0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73, -0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e, -0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b, -0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4, -0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f, -0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef, -0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61, -0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d) -galNI=(( -0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e, -0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e, -0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e, -0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e, -0x80,0x82,0x84,0x86,0x88,0x8a,0x8c,0x8e,0x90,0x92,0x94,0x96,0x98,0x9a,0x9c,0x9e, -0xa0,0xa2,0xa4,0xa6,0xa8,0xaa,0xac,0xae,0xb0,0xb2,0xb4,0xb6,0xb8,0xba,0xbc,0xbe, -0xc0,0xc2,0xc4,0xc6,0xc8,0xca,0xcc,0xce,0xd0,0xd2,0xd4,0xd6,0xd8,0xda,0xdc,0xde, -0xe0,0xe2,0xe4,0xe6,0xe8,0xea,0xec,0xee,0xf0,0xf2,0xf4,0xf6,0xf8,0xfa,0xfc,0xfe, -0x1b,0x19,0x1f,0x1d,0x13,0x11,0x17,0x15,0x0b,0x09,0x0f,0x0d,0x03,0x01,0x07,0x05, -0x3b,0x39,0x3f,0x3d,0x33,0x31,0x37,0x35,0x2b,0x29,0x2f,0x2d,0x23,0x21,0x27,0x25, -0x5b,0x59,0x5f,0x5d,0x53,0x51,0x57,0x55,0x4b,0x49,0x4f,0x4d,0x43,0x41,0x47,0x45, -0x7b,0x79,0x7f,0x7d,0x73,0x71,0x77,0x75,0x6b,0x69,0x6f,0x6d,0x63,0x61,0x67,0x65, -0x9b,0x99,0x9f,0x9d,0x93,0x91,0x97,0x95,0x8b,0x89,0x8f,0x8d,0x83,0x81,0x87,0x85, -0xbb,0xb9,0xbf,0xbd,0xb3,0xb1,0xb7,0xb5,0xab,0xa9,0xaf,0xad,0xa3,0xa1,0xa7,0xa5, -0xdb,0xd9,0xdf,0xdd,0xd3,0xd1,0xd7,0xd5,0xcb,0xc9,0xcf,0xcd,0xc3,0xc1,0xc7,0xc5, -0xfb,0xf9,0xff,0xfd,0xf3,0xf1,0xf7,0xf5,0xeb,0xe9,0xef,0xed,0xe3,0xe1,0xe7,0xe5), -(0x00,0x03,0x06,0x05,0x0c,0x0f,0x0a,0x09,0x18,0x1b,0x1e,0x1d,0x14,0x17,0x12,0x11, -0x30,0x33,0x36,0x35,0x3c,0x3f,0x3a,0x39,0x28,0x2b,0x2e,0x2d,0x24,0x27,0x22,0x21, -0x60,0x63,0x66,0x65,0x6c,0x6f,0x6a,0x69,0x78,0x7b,0x7e,0x7d,0x74,0x77,0x72,0x71, -0x50,0x53,0x56,0x55,0x5c,0x5f,0x5a,0x59,0x48,0x4b,0x4e,0x4d,0x44,0x47,0x42,0x41, -0xc0,0xc3,0xc6,0xc5,0xcc,0xcf,0xca,0xc9,0xd8,0xdb,0xde,0xdd,0xd4,0xd7,0xd2,0xd1, -0xf0,0xf3,0xf6,0xf5,0xfc,0xff,0xfa,0xf9,0xe8,0xeb,0xee,0xed,0xe4,0xe7,0xe2,0xe1, -0xa0,0xa3,0xa6,0xa5,0xac,0xaf,0xaa,0xa9,0xb8,0xbb,0xbe,0xbd,0xb4,0xb7,0xb2,0xb1, -0x90,0x93,0x96,0x95,0x9c,0x9f,0x9a,0x99,0x88,0x8b,0x8e,0x8d,0x84,0x87,0x82,0x81, -0x9b,0x98,0x9d,0x9e,0x97,0x94,0x91,0x92,0x83,0x80,0x85,0x86,0x8f,0x8c,0x89,0x8a, -0xab,0xa8,0xad,0xae,0xa7,0xa4,0xa1,0xa2,0xb3,0xb0,0xb5,0xb6,0xbf,0xbc,0xb9,0xba, -0xfb,0xf8,0xfd,0xfe,0xf7,0xf4,0xf1,0xf2,0xe3,0xe0,0xe5,0xe6,0xef,0xec,0xe9,0xea, -0xcb,0xc8,0xcd,0xce,0xc7,0xc4,0xc1,0xc2,0xd3,0xd0,0xd5,0xd6,0xdf,0xdc,0xd9,0xda, -0x5b,0x58,0x5d,0x5e,0x57,0x54,0x51,0x52,0x43,0x40,0x45,0x46,0x4f,0x4c,0x49,0x4a, -0x6b,0x68,0x6d,0x6e,0x67,0x64,0x61,0x62,0x73,0x70,0x75,0x76,0x7f,0x7c,0x79,0x7a, -0x3b,0x38,0x3d,0x3e,0x37,0x34,0x31,0x32,0x23,0x20,0x25,0x26,0x2f,0x2c,0x29,0x2a, -0x0b,0x08,0x0d,0x0e,0x07,0x04,0x01,0x02,0x13,0x10,0x15,0x16,0x1f,0x1c,0x19,0x1a)) -galI=( -(0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a,0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a, -0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca,0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba, -0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1,0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81, -0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11,0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61, -0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87,0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7, -0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67,0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17, -0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c,0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c, -0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc,0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc, -0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b,0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b, -0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b,0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb, -0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0,0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0, -0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50,0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20, -0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6,0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6, -0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26,0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56, -0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d,0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d, -0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd,0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d), -(0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31,0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69, -0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81,0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9, -0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a,0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12, -0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa,0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2, -0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7,0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f, -0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77,0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f, -0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc,0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4, -0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c,0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54, -0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6,0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e, -0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76,0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e, -0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd,0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5, -0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d,0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55, -0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30,0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68, -0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80,0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8, -0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b,0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13, -0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb,0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3), -(0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23,0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b, -0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3,0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b, -0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98,0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0, -0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48,0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20, -0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e,0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26, -0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e,0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6, -0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5,0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d, -0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25,0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d, -0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9,0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91, -0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29,0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41, -0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42,0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a, -0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92,0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa, -0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94,0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc, -0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44,0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c, -0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f,0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47, -0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff,0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97), -(0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f,0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77, -0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf,0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7, -0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04,0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c, -0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94,0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc, -0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49,0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01, -0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9,0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91, -0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72,0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a, -0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2,0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa, -0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3,0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b, -0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43,0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b, -0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8,0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0, -0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78,0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30, -0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5,0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed, -0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35,0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d, -0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e,0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6, -0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e,0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46)) \ No newline at end of file diff --git a/aespython/cbc_mode.py b/aespython/cbc_mode.py deleted file mode 100644 index f55e647..0000000 --- a/aespython/cbc_mode.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -""" -CBC Mode of operation - -Running this file as __main__ will result in a self-test of the algorithm. - -Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf - -Copyright (c) 2010, Adam Newman http://www.caller9.com/ -Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php -""" -__author__ = "Adam Newman" - -class CBCMode: - """Perform CBC operation on a block and retain IV information for next operation""" - def __init__(self, block_cipher, block_size): - self._block_cipher = block_cipher - self._block_size = block_size - self._iv = [0] * block_size - - def set_iv(self, iv): - if len(iv) == self._block_size: - self._iv = iv - - def encrypt_block(self, plaintext): - iv=self._iv=self._block_cipher.cipher_block([i ^ j for i,j in zip (plaintext, self._iv)]) - return iv - - def decrypt_block(self, ciphertext): - plaintext = list(self._block_cipher.decipher_block(ciphertext)) - for i,v in enumerate(self._iv):plaintext[i]^=v - self._iv = ciphertext - return plaintext - -import unittest -class TestEncryptionMode(unittest.TestCase): - def test_mode(self): - #Self test - import key_expander - import aes_cipher - import test_keys - - test_data = test_keys.TestKeys() - - test_expander = key_expander.KeyExpander(256) - test_expanded_key = test_expander.expand(test_data.test_mode_key) - - test_cipher = aes_cipher.AESCipher(test_expanded_key) - - test_cbc = CBCMode(test_cipher, 16) - - test_cbc.set_iv(test_data.test_mode_iv) - for k in range(4): - self.assertEquals(len([i for i, j in zip(test_data.test_cbc_ciphertext[k],test_cbc.encrypt_block(test_data.test_mode_plaintext[k])) if i == j]), - 16, - msg='CBC encrypt test block %d'%k) - - test_cbc.set_iv(test_data.test_mode_iv) - for k in range(4): - self.assertEquals(len([i for i, j in zip(test_data.test_mode_plaintext[k],test_cbc.decrypt_block(test_data.test_cbc_ciphertext[k])) if i == j]), - 16, - msg='CBC decrypt test block %d'%k) - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/aespython/cfb_mode.py b/aespython/cfb_mode.py deleted file mode 100644 index 76e1971..0000000 --- a/aespython/cfb_mode.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -""" -CFB Mode of operation - -Running this file as __main__ will result in a self-test of the algorithm. - -Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf - -Copyright (c) 2010, Adam Newman http://www.caller9.com/ -Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php -""" -__author__ = "Adam Newman" - -class CFBMode: - """Perform CFB operation on a block and retain IV information for next operation""" - def __init__(self, block_cipher, block_size): - self._block_cipher = block_cipher - self._block_size = block_size - self._iv = [0] * block_size - - def set_iv(self, iv): - if len(iv) == self._block_size: - self._iv = iv - - def encrypt_block(self, plaintext): - cipher_iv = self._block_cipher.cipher_block(self._iv) - iv = self._iv = [i ^ j for i,j in zip (plaintext, cipher_iv)] - return iv - - def decrypt_block(self, ciphertext): - cipher_iv = self._block_cipher.cipher_block(self._iv) - self._iv = ciphertext - return [i ^ j for i,j in zip (cipher_iv, ciphertext)] - -import unittest -class TestEncryptionMode(unittest.TestCase): - def test_mode(self): - #Self test - import key_expander - import aes_cipher - import test_keys - - test_data = test_keys.TestKeys() - - test_expander = key_expander.KeyExpander(256) - test_expanded_key = test_expander.expand(test_data.test_mode_key) - - test_cipher = aes_cipher.AESCipher(test_expanded_key) - - test_cfb = CFBMode(test_cipher, 16) - - test_cfb.set_iv(test_data.test_mode_iv) - for k in range(4): - self.assertEquals(len([i for i, j in zip(test_data.test_cfb_ciphertext[k],test_cfb.encrypt_block(test_data.test_mode_plaintext[k])) if i == j]), - 16, - msg='CFB encrypt test block' + str(k)) - - test_cfb.set_iv(test_data.test_mode_iv) - for k in range(4): - self.assertEquals(len([i for i, j in zip(test_data.test_mode_plaintext[k],test_cfb.decrypt_block(test_data.test_cfb_ciphertext[k])) if i == j]), - 16, - msg='CFB decrypt test block' + str(k)) - -if __name__ == "__main__": - unittest.main() diff --git a/aespython/key_expander.py b/aespython/key_expander.py deleted file mode 100644 index 0a20188..0000000 --- a/aespython/key_expander.py +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python - -""" -AES Key Expansion. - -Expands 128, 192, or 256 bit key for use with AES - -Running this file as __main__ will result in a self-test of the algorithm. - -Algorithm per NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf - -Copyright (c) 2010, Adam Newman http://www.caller9.com/ -Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php -""" -__author__ = "Adam Newman" - -#Normally use relative import. In test mode use local import. -try:from .aes_tables import sbox,rcon -except ValueError:from aes_tables import sbox,rcon -from operator import xor -class KeyExpander: - """Perform AES Key Expansion""" - - _expanded_key_length = {128 : 176, 192 : 208, 256 : 240} - - def __init__(self, key_length): - self._key_length = key_length - self._n = key_length>>3 - - if key_length in self._expanded_key_length: - self._b = self._expanded_key_length[key_length] - else: - raise LookupError('Invalid Key Size') - - def expand(self, new_key): - """ - Expand the encryption key per AES key schedule specifications - - http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_description - """ - #First n bytes are copied from key - len_new_key = len(new_key) - if len_new_key != self._n: - raise RuntimeError('expand(): key size is invalid') - rcon_iter = 1 - nex=new_key.extend - - #Grow the key until it is the correct length - while 1: - #Copy last 4 bytes of extended key, apply core, increment i(rcon_iter), - #core Append the list of elements 1-3 and list comprised of element 0 (circular rotate left) - #core For each element of this new list, put the result of sbox into output array. - #xor with 4 bytes n bytes from end of extended key - keyarr=[sbox[i] for i in new_key[-3:]+new_key[-4:-3]] - #First byte of output array is XORed with rcon(iter) - keyarr[0] ^= rcon[rcon_iter] - nex(map(xor,keyarr, new_key[-self._n:4-self._n])) - rcon_iter += 1 - len_new_key += 4 - - #Run three passes of 4 byte expansion using copy of 4 byte tail of extended key - #which is then xor'd with 4 bytes n bytes from end of extended key - for j in 0,1,2: - nex(map(xor,new_key[-4:], new_key[-self._n:4-self._n])) - len_new_key += 4 - if len_new_key >= self._b:return new_key - else: - #If key length is 256 and key is not complete, add 4 bytes tail of extended key - #run through sbox before xor with 4 bytes n bytes from end of extended key - if self._key_length == 256: - nex(map(xor,[sbox[x] for x in new_key[-4:]], new_key[-self._n:4-self._n])) - len_new_key += 4 - if len_new_key >= self._b:return new_key - - #If key length is 192 or 256 and key is not complete, run 2 or 3 passes respectively - #of 4 byte tail of extended key xor with 4 bytes n bytes from end of extended key - if self._key_length != 128: - for j in ((0,1) if self._key_length == 192 else (0,1,2)): - nex(map(xor,new_key[-4:], new_key[-self._n:4-self._n])) - len_new_key += 4 - if len_new_key >= self._b:return new_key - -import unittest -class TestKeyExpander(unittest.TestCase): - def test_keys(self): - """Test All Key Expansions""" - import test_keys - test_data = test_keys.TestKeys() - for key_size in 128, 192, 256: - test_expander = KeyExpander(key_size) - test_expanded_key = test_expander.expand(test_data.test_key[key_size]) - self.assertEqual (len([i for i, j in zip(test_expanded_key, test_data.test_expanded_key_validated[key_size]) if i == j]), - len(test_data.test_expanded_key_validated[key_size]), - msg='Key expansion ' + str(key_size) + ' bit') - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/aespython/ofb_mode.py b/aespython/ofb_mode.py deleted file mode 100644 index 84d6048..0000000 --- a/aespython/ofb_mode.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python -""" -OFB Mode of operation - -Running this file as __main__ will result in a self-test of the algorithm. - -Algorithm per NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf - -Copyright (c) 2010, Adam Newman http://www.caller9.com/ -Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php -""" -__author__ = "Adam Newman" - -class OFBMode: - """Perform OFB operation on a block and retain IV information for next operation""" - def __init__(self, block_cipher, block_size): - self._block_cipher = block_cipher - self._block_size = block_size - self._iv = [0] * block_size - - def set_iv(self, iv): - if len(iv) == self._block_size: - self._iv = iv - - def encrypt_block(self, plaintext): - self._iv = cipher_iv = self._block_cipher.cipher_block(self._iv) - return [i ^ j for i,j in zip (plaintext, cipher_iv)] - - def decrypt_block(self, ciphertext): - self._iv = cipher_iv = self._block_cipher.cipher_block(self._iv) - return [i ^ j for i,j in zip (cipher_iv, ciphertext)] - -import unittest -class TestEncryptionMode(unittest.TestCase): - def test_mode(self): - #Self test - import key_expander - import aes_cipher - import test_keys - - test_data = test_keys.TestKeys() - - test_expander = key_expander.KeyExpander(256) - test_expanded_key = test_expander.expand(test_data.test_mode_key) - - test_cipher = aes_cipher.AESCipher(test_expanded_key) - - test_ofb = OFBMode(test_cipher, 16) - - test_ofb.set_iv(test_data.test_mode_iv) - for k in range(4): - self.assertEquals(len([i for i, j in zip(test_data.test_ofb_ciphertext[k],test_ofb.encrypt_block(test_data.test_mode_plaintext[k])) if i == j]), - 16, - msg='OFB encrypt test block' + str(k)) - - test_ofb.set_iv(test_data.test_mode_iv) - for k in range(4): - self.assertEquals(len([i for i, j in zip(test_data.test_mode_plaintext[k],test_ofb.decrypt_block(test_data.test_ofb_ciphertext[k])) if i == j]), - 16, - msg='OFB decrypt test block' + str(k)) - -if __name__ == "__main__": - unittest.main() \ No newline at end of file diff --git a/aespython/test_keys.py b/aespython/test_keys.py deleted file mode 100644 index fb28286..0000000 --- a/aespython/test_keys.py +++ /dev/null @@ -1,119 +0,0 @@ -""" -Test keys and data for self-test operations. - -Test data from: -NIST SP 800-38A http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf -NIST FIPS-197 http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf - -Copyright (c) 2010, Adam Newman http://www.caller9.com/ -Licensed under the MIT license http://www.opensource.org/licenses/mit-license.php -""" -__author__ = "Adam Newman" - -class TestKeys: - """Test data, keys, IVs, and output to use in self-tests""" - test_key = { - 128 : [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f] - , 192 : [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17] - , 256 : [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f] - } - - test_expanded_key_validated = { - 128 : [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0xd6, 0xaa, 0x74, 0xfd, 0xd2, 0xaf, 0x72, 0xfa, 0xda, 0xa6, 0x78, 0xf1, 0xd6, 0xab, 0x76, 0xfe, - 0xb6, 0x92, 0xcf, 0x0b, 0x64, 0x3d, 0xbd, 0xf1, 0xbe, 0x9b, 0xc5, 0x00, 0x68, 0x30, 0xb3, 0xfe, - 0xb6, 0xff, 0x74, 0x4e, 0xd2, 0xc2, 0xc9, 0xbf, 0x6c, 0x59, 0x0c, 0xbf, 0x04, 0x69, 0xbf, 0x41, - 0x47, 0xf7, 0xf7, 0xbc, 0x95, 0x35, 0x3e, 0x03, 0xf9, 0x6c, 0x32, 0xbc, 0xfd, 0x05, 0x8d, 0xfd, - 0x3c, 0xaa, 0xa3, 0xe8, 0xa9, 0x9f, 0x9d, 0xeb, 0x50, 0xf3, 0xaf, 0x57, 0xad, 0xf6, 0x22, 0xaa, - 0x5e, 0x39, 0x0f, 0x7d, 0xf7, 0xa6, 0x92, 0x96, 0xa7, 0x55, 0x3d, 0xc1, 0x0a, 0xa3, 0x1f, 0x6b, - 0x14, 0xf9, 0x70, 0x1a, 0xe3, 0x5f, 0xe2, 0x8c, 0x44, 0x0a, 0xdf, 0x4d, 0x4e, 0xa9, 0xc0, 0x26, - 0x47, 0x43, 0x87, 0x35, 0xa4, 0x1c, 0x65, 0xb9, 0xe0, 0x16, 0xba, 0xf4, 0xae, 0xbf, 0x7a, 0xd2, - 0x54, 0x99, 0x32, 0xd1, 0xf0, 0x85, 0x57, 0x68, 0x10, 0x93, 0xed, 0x9c, 0xbe, 0x2c, 0x97, 0x4e, - 0x13, 0x11, 0x1d, 0x7f, 0xe3, 0x94, 0x4a, 0x17, 0xf3, 0x07, 0xa7, 0x8b, 0x4d, 0x2b, 0x30, 0xc5] - , 192 : [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x58, 0x46, 0xf2, 0xf9, 0x5c, 0x43, 0xf4, 0xfe, - 0x54, 0x4a, 0xfe, 0xf5, 0x58, 0x47, 0xf0, 0xfa, 0x48, 0x56, 0xe2, 0xe9, 0x5c, 0x43, 0xf4, 0xfe, - 0x40, 0xf9, 0x49, 0xb3, 0x1c, 0xba, 0xbd, 0x4d, 0x48, 0xf0, 0x43, 0xb8, 0x10, 0xb7, 0xb3, 0x42, - 0x58, 0xe1, 0x51, 0xab, 0x04, 0xa2, 0xa5, 0x55, 0x7e, 0xff, 0xb5, 0x41, 0x62, 0x45, 0x08, 0x0c, - 0x2a, 0xb5, 0x4b, 0xb4, 0x3a, 0x02, 0xf8, 0xf6, 0x62, 0xe3, 0xa9, 0x5d, 0x66, 0x41, 0x0c, 0x08, - 0xf5, 0x01, 0x85, 0x72, 0x97, 0x44, 0x8d, 0x7e, 0xbd, 0xf1, 0xc6, 0xca, 0x87, 0xf3, 0x3e, 0x3c, - 0xe5, 0x10, 0x97, 0x61, 0x83, 0x51, 0x9b, 0x69, 0x34, 0x15, 0x7c, 0x9e, 0xa3, 0x51, 0xf1, 0xe0, - 0x1e, 0xa0, 0x37, 0x2a, 0x99, 0x53, 0x09, 0x16, 0x7c, 0x43, 0x9e, 0x77, 0xff, 0x12, 0x05, 0x1e, - 0xdd, 0x7e, 0x0e, 0x88, 0x7e, 0x2f, 0xff, 0x68, 0x60, 0x8f, 0xc8, 0x42, 0xf9, 0xdc, 0xc1, 0x54, - 0x85, 0x9f, 0x5f, 0x23, 0x7a, 0x8d, 0x5a, 0x3d, 0xc0, 0xc0, 0x29, 0x52, 0xbe, 0xef, 0xd6, 0x3a, - 0xde, 0x60, 0x1e, 0x78, 0x27, 0xbc, 0xdf, 0x2c, 0xa2, 0x23, 0x80, 0x0f, 0xd8, 0xae, 0xda, 0x32, - 0xa4, 0x97, 0x0a, 0x33, 0x1a, 0x78, 0xdc, 0x09, 0xc4, 0x18, 0xc2, 0x71, 0xe3, 0xa4, 0x1d, 0x5d] - , 256 : [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0xa5, 0x73, 0xc2, 0x9f, 0xa1, 0x76, 0xc4, 0x98, 0xa9, 0x7f, 0xce, 0x93, 0xa5, 0x72, 0xc0, 0x9c, - 0x16, 0x51, 0xa8, 0xcd, 0x02, 0x44, 0xbe, 0xda, 0x1a, 0x5d, 0xa4, 0xc1, 0x06, 0x40, 0xba, 0xde, - 0xae, 0x87, 0xdf, 0xf0, 0x0f, 0xf1, 0x1b, 0x68, 0xa6, 0x8e, 0xd5, 0xfb, 0x03, 0xfc, 0x15, 0x67, - 0x6d, 0xe1, 0xf1, 0x48, 0x6f, 0xa5, 0x4f, 0x92, 0x75, 0xf8, 0xeb, 0x53, 0x73, 0xb8, 0x51, 0x8d, - 0xc6, 0x56, 0x82, 0x7f, 0xc9, 0xa7, 0x99, 0x17, 0x6f, 0x29, 0x4c, 0xec, 0x6c, 0xd5, 0x59, 0x8b, - 0x3d, 0xe2, 0x3a, 0x75, 0x52, 0x47, 0x75, 0xe7, 0x27, 0xbf, 0x9e, 0xb4, 0x54, 0x07, 0xcf, 0x39, - 0x0b, 0xdc, 0x90, 0x5f, 0xc2, 0x7b, 0x09, 0x48, 0xad, 0x52, 0x45, 0xa4, 0xc1, 0x87, 0x1c, 0x2f, - 0x45, 0xf5, 0xa6, 0x60, 0x17, 0xb2, 0xd3, 0x87, 0x30, 0x0d, 0x4d, 0x33, 0x64, 0x0a, 0x82, 0x0a, - 0x7c, 0xcf, 0xf7, 0x1c, 0xbe, 0xb4, 0xfe, 0x54, 0x13, 0xe6, 0xbb, 0xf0, 0xd2, 0x61, 0xa7, 0xdf, - 0xf0, 0x1a, 0xfa, 0xfe, 0xe7, 0xa8, 0x29, 0x79, 0xd7, 0xa5, 0x64, 0x4a, 0xb3, 0xaf, 0xe6, 0x40, - 0x25, 0x41, 0xfe, 0x71, 0x9b, 0xf5, 0x00, 0x25, 0x88, 0x13, 0xbb, 0xd5, 0x5a, 0x72, 0x1c, 0x0a, - 0x4e, 0x5a, 0x66, 0x99, 0xa9, 0xf2, 0x4f, 0xe0, 0x7e, 0x57, 0x2b, 0xaa, 0xcd, 0xf8, 0xcd, 0xea, - 0x24, 0xfc, 0x79, 0xcc, 0xbf, 0x09, 0x79, 0xe9, 0x37, 0x1a, 0xc2, 0x3c, 0x6d, 0x68, 0xde, 0x36] - } - - test_block_ciphertext_validated = { - 128 : [ - 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a] - , 192 : [ - 0xdd, 0xa9, 0x7c, 0xa4, 0x86, 0x4c, 0xdf, 0xe0, 0x6e, 0xaf, 0x70, 0xa0, 0xec, 0x0d, 0x71, 0x91] - , 256 : [ - 0x8e, 0xa2, 0xb7, 0xca, 0x51, 0x67, 0x45, 0xbf, 0xea, 0xfc, 0x49, 0x90, 0x4b, 0x49, 0x60, 0x89] - } - - test_block_plaintext = [ - 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff] - - #After initial validation, these deviated from test in SP 800-38A to use same key, iv, and plaintext on tests. - #Still valid, just easier to test with. - test_mode_key= [ - 0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81, - 0x1f, 0x35, 0x2c, 0x07, 0x3b, 0x61, 0x08, 0xd7, 0x2d, 0x98, 0x10, 0xa3, 0x09, 0x14, 0xdf, 0xf4] - test_mode_iv = [ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f] - test_mode_plaintext = [ - [0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a], - [0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51], - [0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef], - [0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10]] - test_cbc_ciphertext = [ - [0xf5, 0x8c, 0x4c, 0x04, 0xd6, 0xe5, 0xf1, 0xba, 0x77, 0x9e, 0xab, 0xfb, 0x5f, 0x7b, 0xfb, 0xd6], - [0x9c, 0xfc, 0x4e, 0x96, 0x7e, 0xdb, 0x80, 0x8d, 0x67, 0x9f, 0x77, 0x7b, 0xc6, 0x70, 0x2c, 0x7d], - [0x39, 0xf2, 0x33, 0x69, 0xa9, 0xd9, 0xba, 0xcf, 0xa5, 0x30, 0xe2, 0x63, 0x04, 0x23, 0x14, 0x61], - [0xb2, 0xeb, 0x05, 0xe2, 0xc3, 0x9b, 0xe9, 0xfc, 0xda, 0x6c, 0x19, 0x07, 0x8c, 0x6a, 0x9d, 0x1b]] - test_cfb_ciphertext = [ - [0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60], - [0x39, 0xff, 0xed, 0x14, 0x3b, 0x28, 0xb1, 0xc8, 0x32, 0x11, 0x3c, 0x63, 0x31, 0xe5, 0x40, 0x7b], - [0xdf, 0x10, 0x13, 0x24, 0x15, 0xe5, 0x4b, 0x92, 0xa1, 0x3e, 0xd0, 0xa8, 0x26, 0x7a, 0xe2, 0xf9], - [0x75, 0xa3, 0x85, 0x74, 0x1a, 0xb9, 0xce, 0xf8, 0x20, 0x31, 0x62, 0x3d, 0x55, 0xb1, 0xe4, 0x71]] - test_ofb_ciphertext = [ - [0xdc, 0x7e, 0x84, 0xbf, 0xda, 0x79, 0x16, 0x4b, 0x7e, 0xcd, 0x84, 0x86, 0x98, 0x5d, 0x38, 0x60], - [0x4f, 0xeb, 0xdc, 0x67, 0x40, 0xd2, 0x0b, 0x3a, 0xc8, 0x8f, 0x6a, 0xd8, 0x2a, 0x4f, 0xb0, 0x8d], - [0x71, 0xab, 0x47, 0xa0, 0x86, 0xe8, 0x6e, 0xed, 0xf3, 0x9d, 0x1c, 0x5b, 0xba, 0x97, 0xc4, 0x08], - [0x01, 0x26, 0x14, 0x1d, 0x67, 0xf3, 0x7b, 0xe8, 0x53, 0x8f, 0x5a, 0x8b, 0xe7, 0x40, 0xe4, 0x84]] - - def hex_output(self, list): - #Debugging output helper - result = '[' - for i in list[:-1]: - result += hex(i) + ',' - return result + hex(list[-1]) + ']' - - - \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..4c7996c --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,14 @@ +environment: + matrix: + - PYTHON: "C:/Python27" + +install: + - "set PATH=%PYTHON%;%PYTHON%/Scripts;%PATH%" + + - "python.exe setup.py develop" + - "pip.exe install pytest pytest-cov mock" + +build: false + +test_script: + - "pytest.exe --cov=peepdf" diff --git a/colorama/PKG-INFO b/colorama/PKG-INFO deleted file mode 100644 index 25ea42d..0000000 --- a/colorama/PKG-INFO +++ /dev/null @@ -1,330 +0,0 @@ -Metadata-Version: 1.0 -Name: colorama -Version: 0.1.18 -Summary: Cross-platform colored terminal text. -Home-page: http://code.google.com/p/colorama/ -Author: Jonathan Hartley -Author-email: tartley@tartley.com -License: BSD -Description: Download and docs: - http://pypi.python.org/pypi/colorama - Development: - http://code.google.com/p/colorama - - Description - =========== - - Makes ANSI escape character sequences for producing colored terminal text work - under MS Windows. - - ANSI escape character sequences have long been used to produce colored terminal - text on Unix and Macs. Colorama makes this work on Windows, too. It also - provides some shortcuts to help generate ANSI sequences, and works fine in - conjunction with any other ANSI sequence generation library, such as Termcolor - (http://pypi.python.org/pypi/termcolor.) - - This has the upshot of providing a simple cross-platform API for printing - colored terminal text from Python, and has the happy side-effect that existing - applications or libraries which use ANSI sequences to produce colored output on - Linux or Macs can now also work on Windows, simply by calling - ``colorama.init()``. - - A demo script in the source code repository prints some colored text using - ANSI sequences. Compare its output under Gnome-terminal's built in ANSI - handling, versus on Windows Command-Prompt using Colorama: - - .. image:: http://colorama.googlecode.com/hg/screenshots/ubuntu-demo.png - :width: 661 - :height: 357 - :alt: ANSI sequences on Ubuntu under gnome-terminal. - - .. image:: http://colorama.googlecode.com/hg/screenshots/windows-demo.png - :width: 668 - :height: 325 - :alt: Same ANSI sequences on Windows, using Colorama. - - These screengrabs make it clear that Colorama on Windows does not support - ANSI 'dim text': it looks the same as 'normal text'. - - - Dependencies - ============ - - None, other than Python. Tested on Python 2.5.5, 2.6.5, 2.7 & 3.1.2. - - - Usage - ===== - - Initialisation - -------------- - - Applications should initialise Colorama using:: - - from colorama import init - init() - - If you are on Windows, the call to ``init()`` will start filtering ANSI escape - sequences out of any text sent to stdout or stderr, and will replace them with - equivalent Win32 calls. - - Calling ``init()`` has no effect on other platforms (unless you request other - optional functionality, see keyword args below.) The intention is that - applications can call ``init()`` unconditionally on all platforms, after which - ANSI output should just work. - - - Colored Output - -------------- - - Cross-platform printing of colored text can then be done using Colorama's - constant shorthand for ANSI escape sequences:: - - from colorama import Fore, Back, Style - print Fore.RED + 'some red text' - print Back.GREEN + and with a green background' - print Style.DIM + 'and in dim text' - print + Fore.RESET + Back.RESET + Style.RESET_ALL - print 'back to normal now' - - or simply by manually printing ANSI sequences from your own code:: - - print '/033[31m' + 'some red text' - print '/033[30m' # and reset to default color - - or Colorama can be used happily in conjunction with existing ANSI libraries - such as Termcolor:: - - from colorama import init - from termcolor import colored - - # use Colorama to make Termcolor work on Windows too - init() - - # then use Termcolor for all colored text output - print colored('Hello, World!', 'green', 'on_red') - - Available formatting constants are:: - - Fore: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET. - Back: BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE, RESET. - Style: DIM, NORMAL, BRIGHT, RESET_ALL - - Style.RESET_ALL resets foreground, background and brightness. Colorama will - perform this reset automatically on program exit. - - - Init Keyword Args - ----------------- - - ``init()`` accepts some kwargs to override default behaviour. - - init(autoreset=False): - If you find yourself repeatedly sending reset sequences to turn off color - changes at the end of every print, then ``init(autoreset=True)`` will - automate that:: - - from colorama import init - init(autoreset=True) - print Fore.RED + 'some red text' - print 'automatically back to default color again' - - init(strip=None): - Pass ``True`` or ``False`` to override whether ansi codes should be - stripped from the output. The default behaviour is to strip if on Windows. - - init(convert=None): - Pass ``True`` or ``False`` to override whether to convert ansi codes in the - output into win32 calls. The default behaviour is to convert if on Windows - and output is to a tty (terminal). - - init(wrap=True): - On Windows, colorama works by replacing ``sys.stdout`` and ``sys.stderr`` - with proxy objects, which override the .write() method to do their work. If - this wrapping causes you problems, then this can be disabled by passing - ``init(wrap=False)``. The default behaviour is to wrap if autoreset or - strip or convert are True. - - When wrapping is disabled, colored printing on non-Windows platforms will - continue to work as normal. To do cross-platform colored output, you can - use Colorama's ``AnsiToWin32`` proxy directly:: - - from colorama import init, AnsiToWin32 - init(wrap=False) - stream = AnsiToWin32(sys.stderr).stream - print >>stream, Fore.BLUE + 'blue text on stderr' - - - Status & Known Problems - ======================= - - Feature complete as far as colored text goes, but still finding bugs and - occasionally making small changes to the API (such as new keyword arguments - to ``init()``). - - Only tested on WinXP (CMD, Console2) and Ubuntu (gnome-terminal, xterm). Much - obliged if anyone can let me know how it fares elsewhere, in particular on - Macs. - - I'd like to add the ability to handle ANSI codes which position the text cursor - and clear the terminal. - - See outstanding issues and wishlist at: - http://code.google.com/p/colorama/issues/list - - If anything doesn't work for you, or doesn't do what you expected or hoped for, - I'd *love* to hear about it on that issues list. - - - Recognised ANSI Sequences - ========================= - - ANSI sequences generally take the form: - - ESC [ ; ... - - Where is an integer, and is a single letter. Zero or more - params are passed to a . If no params are passed, it is generally - synonymous with passing a single zero. No spaces exist in the sequence, they - have just been inserted here to make it easy to read. - - The only ANSI sequences that colorama converts into win32 calls are:: - - ESC [ 0 m # reset all (colors and brightness) - ESC [ 1 m # bright - ESC [ 2 m # dim (looks same as normal brightness) - ESC [ 22 m # normal brightness - - # FOREGROUND: - ESC [ 30 m # black - ESC [ 31 m # red - ESC [ 32 m # green - ESC [ 33 m # yellow - ESC [ 34 m # blue - ESC [ 35 m # magenta - ESC [ 36 m # cyan - ESC [ 37 m # white - ESC [ 39 m # reset - - # BACKGROUND - ESC [ 40 m # black - ESC [ 41 m # red - ESC [ 42 m # green - ESC [ 43 m # yellow - ESC [ 44 m # blue - ESC [ 45 m # magenta - ESC [ 46 m # cyan - ESC [ 47 m # white - ESC [ 49 m # reset - - Multiple numeric params to the 'm' command can be combined into a single - sequence, eg:: - - ESC [ 36 ; 45 ; 1 m # bright cyan text on magenta background - - All other ANSI sequences of the form ``ESC [ ; ... `` - are silently stripped from the output on Windows. - - Any other form of ANSI sequence, such as single-character codes or alternative - initial characters, are not recognised nor stripped. - - - Development - =========== - - Running tests requires: - - - Michael Foord's 'mock' module to be installed. - - Either to be run under Python2.7 or 3.1 stdlib unittest, or to have Michael - Foord's 'unittest2' module to be installed. - - unittest2 test discovery doesn't work for colorama, so I use 'nose':: - - nosetests -s - - The -s is required because 'nosetests' otherwise applies a proxy of its own to - stdout, which confuses the unit tests. - - - Thanks - ====== - Roger Binns, for many suggestions, valuable feedback, & bug reports. - Tim Golden for thought and much appreciated feedback on the initial idea. - - - Changes - ======= - - 0.1.17 - Prevent printing of garbage ANSI codes upon installing with pip - 0.1.16 - Re-upload to fix previous error. Make clean now removes old MANIFEST. - 0.1.15 - Completely broken. Distribution was empty due to leftover invalid MANIFEST - file from building on a different platform. - Fix python3 incompatibility kindly reported by G |uumlaut| nter Kolousek - 0.1.14 - Fix hard-coded reset to white-on-black colors. Fore.RESET, Back.RESET - and Style.RESET_ALL now revert to the colors as they were when init() - was called. Some lessons hopefully learned about testing prior to release. - 0.1.13 - Completely broken: barfed when installed using pip. - 0.1.12 - Completely broken: contained no source code. double oops. - 0.1.11 - Completely broken: fatal import errors on Ubuntu. oops. - 0.1.10 - Stop emulating 'bright' text with bright backgrounds. - Display 'normal' text using win32 normal foreground instead of bright. - Drop support for 'dim' text. - 0.1.9 - Fix incompatibility with Python 2.5 and earlier. - Remove setup.py dependency on setuptools, now uses stdlib distutils. - 0.1.8 - Fix ghastly errors all over the place on Ubuntu. - Add init kwargs 'convert' and 'strip', which supercede the old 'wrap'. - 0.1.7 - Python 3 compatible. - Fix: Now strips ansi on windows without necessarily converting it to - win32 calls (eg. if output is not a tty.) - Fix: Flaky interaction of interleaved ansi sent to stdout and stderr. - Improved demo.sh (hg checkout only.) - 0.1.6 - Fix ansi sequences with no params now default to parmlist of [0]. - Fix flaky behaviour of autoreset and reset_all atexit. - Fix stacking of repeated atexit calls - now just called once. - Fix ghastly import problems while running tests. - 'demo.py' (hg checkout only) now demonstrates autoreset and reset atexit. - Provide colorama.VERSION, used by setup.py. - Tests defanged so they no longer actually change terminal color when run. - 0.1.5 - Now works on Ubuntu. - 0.1.4 - Implemented RESET_ALL on application exit - 0.1.3 - Implemented init(wrap=False) - 0.1.2 - Implemented init(autoreset=True) - 0.1.1 - Minor tidy - 0.1 - Works on Windows for foreground color, background color, bright or dim - - .. |uumlaut| unicode:: U+00FC .. u with umlaut - :trim: - - -Keywords: color colour terminal text ansi windows crossplatform xplatform -Platform: UNKNOWN -Classifier: Development Status :: 2 - Pre-Alpha -Classifier: Environment :: Console -Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: BSD License -Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.5 -Classifier: Programming Language :: Python :: 2.6 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.1 -Classifier: Topic :: Terminals diff --git a/colorama/__init__.py b/colorama/__init__.py deleted file mode 100644 index 331174e..0000000 --- a/colorama/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .initialise import init -from .ansi import Fore, Back, Style -from .ansitowin32 import AnsiToWin32 - -VERSION = '0.1.18' - diff --git a/colorama/ansi.py b/colorama/ansi.py deleted file mode 100644 index 7c45983..0000000 --- a/colorama/ansi.py +++ /dev/null @@ -1,49 +0,0 @@ -''' -This module generates ANSI character codes to printing colors to terminals. -See: http://en.wikipedia.org/wiki/ANSI_escape_code -''' - -CSI = '\033[' - -def code_to_chars(code): - return CSI + str(code) + 'm' - -class AnsiCodes(object): - def __init__(self, codes): - for name in dir(codes): - if not name.startswith('_'): - value = getattr(codes, name) - setattr(self, name, code_to_chars(value)) - -class AnsiFore: - BLACK = 30 - RED = 31 - GREEN = 32 - YELLOW = 33 - BLUE = 34 - MAGENTA = 35 - CYAN = 36 - WHITE = 37 - RESET = 39 - -class AnsiBack: - BLACK = 40 - RED = 41 - GREEN = 42 - YELLOW = 43 - BLUE = 44 - MAGENTA = 45 - CYAN = 46 - WHITE = 47 - RESET = 49 - -class AnsiStyle: - BRIGHT = 1 - DIM = 2 - NORMAL = 22 - RESET_ALL = 0 - -Fore = AnsiCodes( AnsiFore ) -Back = AnsiCodes( AnsiBack ) -Style = AnsiCodes( AnsiStyle ) - diff --git a/colorama/ansitowin32.py b/colorama/ansitowin32.py deleted file mode 100644 index 363061d..0000000 --- a/colorama/ansitowin32.py +++ /dev/null @@ -1,176 +0,0 @@ - -import re -import sys - -from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style -from .winterm import WinTerm, WinColor, WinStyle -from .win32 import windll - - -if windll is not None: - winterm = WinTerm() - - -def is_a_tty(stream): - return hasattr(stream, 'isatty') and stream.isatty() - - -class StreamWrapper(object): - ''' - Wraps a stream (such as stdout), acting as a transparent proxy for all - attribute access apart from method 'write()', which is delegated to our - Converter instance. - ''' - def __init__(self, wrapped, converter): - # double-underscore everything to prevent clashes with names of - # attributes on the wrapped stream object. - self.__wrapped = wrapped - self.__convertor = converter - - def __getattr__(self, name): - return getattr(self.__wrapped, name) - - def write(self, text): - self.__convertor.write(text) - - -class AnsiToWin32(object): - ''' - Implements a 'write()' method which, on Windows, will strip ANSI character - sequences from the text, and if outputting to a tty, will convert them into - win32 function calls. - ''' - ANSI_RE = re.compile('\033\[((?:\d|;)*)([a-zA-Z])') - - def __init__(self, wrapped, convert=None, strip=None, autoreset=False): - # The wrapped stream (normally sys.stdout or sys.stderr) - self.wrapped = wrapped - - # should we reset colors to defaults after every .write() - self.autoreset = autoreset - - # create the proxy wrapping our output stream - self.stream = StreamWrapper(wrapped, self) - - on_windows = sys.platform.startswith('win') - - # should we strip ANSI sequences from our output? - if strip is None: - strip = on_windows - self.strip = strip - - # should we should convert ANSI sequences into win32 calls? - if convert is None: - convert = on_windows and is_a_tty(wrapped) - self.convert = convert - - # dict of ansi codes to win32 functions and parameters - self.win32_calls = self.get_win32_calls() - - # are we wrapping stderr? - self.on_stderr = self.wrapped is sys.stderr - - - def should_wrap(self): - ''' - True if this class is actually needed. If false, then the output - stream will not be affected, nor will win32 calls be issued, so - wrapping stdout is not actually required. This will generally be - False on non-Windows platforms, unless optional functionality like - autoreset has been requested using kwargs to init() - ''' - return self.convert or self.strip or self.autoreset - - - def get_win32_calls(self): - if self.convert and winterm: - return { - AnsiStyle.RESET_ALL: (winterm.reset_all, ), - AnsiStyle.BRIGHT: (winterm.style, WinStyle.BRIGHT), - AnsiStyle.DIM: (winterm.style, WinStyle.NORMAL), - AnsiStyle.NORMAL: (winterm.style, WinStyle.NORMAL), - AnsiFore.BLACK: (winterm.fore, WinColor.BLACK), - AnsiFore.RED: (winterm.fore, WinColor.RED), - AnsiFore.GREEN: (winterm.fore, WinColor.GREEN), - AnsiFore.YELLOW: (winterm.fore, WinColor.YELLOW), - AnsiFore.BLUE: (winterm.fore, WinColor.BLUE), - AnsiFore.MAGENTA: (winterm.fore, WinColor.MAGENTA), - AnsiFore.CYAN: (winterm.fore, WinColor.CYAN), - AnsiFore.WHITE: (winterm.fore, WinColor.GREY), - AnsiFore.RESET: (winterm.fore, ), - AnsiBack.BLACK: (winterm.back, WinColor.BLACK), - AnsiBack.RED: (winterm.back, WinColor.RED), - AnsiBack.GREEN: (winterm.back, WinColor.GREEN), - AnsiBack.YELLOW: (winterm.back, WinColor.YELLOW), - AnsiBack.BLUE: (winterm.back, WinColor.BLUE), - AnsiBack.MAGENTA: (winterm.back, WinColor.MAGENTA), - AnsiBack.CYAN: (winterm.back, WinColor.CYAN), - AnsiBack.WHITE: (winterm.back, WinColor.GREY), - AnsiBack.RESET: (winterm.back, ), - } - - - def write(self, text): - if self.strip or self.convert: - self.write_and_convert(text) - else: - self.wrapped.write(text) - self.wrapped.flush() - if self.autoreset: - self.reset_all() - - - def reset_all(self): - if self.convert: - self.call_win32('m', (0,)) - else: - self.wrapped.write(Style.RESET_ALL) - - - def write_and_convert(self, text): - ''' - Write the given text to our wrapped stream, stripping any ANSI - sequences from the text, and optionally converting them into win32 - calls. - ''' - cursor = 0 - for match in self.ANSI_RE.finditer(text): - start, end = match.span() - self.write_plain_text(text, cursor, start) - self.convert_ansi(*match.groups()) - cursor = end - self.write_plain_text(text, cursor, len(text)) - - - def write_plain_text(self, text, start, end): - if start < end: - self.wrapped.write(text[start:end]) - self.wrapped.flush() - - - def convert_ansi(self, paramstring, command): - if self.convert: - params = self.extract_params(paramstring) - self.call_win32(command, params) - - - def extract_params(self, paramstring): - def split(paramstring): - for p in paramstring.split(';'): - if p != '': - yield int(p) - return tuple(split(paramstring)) - - - def call_win32(self, command, params): - if params == []: - params = [0] - if command == 'm': - for param in params: - if param in self.win32_calls: - func_args = self.win32_calls[param] - func = func_args[0] - args = func_args[1:] - kwargs = dict(on_stderr=self.on_stderr) - func(*args, **kwargs) - diff --git a/colorama/initialise.py b/colorama/initialise.py deleted file mode 100644 index 4df5c3e..0000000 --- a/colorama/initialise.py +++ /dev/null @@ -1,38 +0,0 @@ -import atexit -import sys - -from .ansitowin32 import AnsiToWin32 - - -orig_stdout = sys.stdout -orig_stderr = sys.stderr - -atexit_done = False - - -def reset_all(): - AnsiToWin32(orig_stdout).reset_all() - - -def init(autoreset=False, convert=None, strip=None, wrap=True): - - if wrap==False and (autoreset==True or convert==True or strip==True): - raise ValueError('wrap=False conflicts with any other arg=True') - - sys.stdout = wrap_stream(orig_stdout, convert, strip, autoreset, wrap) - sys.stderr = wrap_stream(orig_stderr, convert, strip, autoreset, wrap) - - global atexit_done - if not atexit_done: - atexit.register(reset_all) - atexit_done = True - - -def wrap_stream(stream, convert, strip, autoreset, wrap): - if wrap: - wrapper = AnsiToWin32(stream, - convert=convert, strip=strip, autoreset=autoreset) - if wrapper.should_wrap(): - stream = wrapper.stream - return stream - diff --git a/colorama/win32.py b/colorama/win32.py deleted file mode 100644 index 2a6fc94..0000000 --- a/colorama/win32.py +++ /dev/null @@ -1,95 +0,0 @@ - -# from winbase.h -STDOUT = -11 -STDERR = -12 - -try: - from ctypes import windll -except ImportError: - windll = None - SetConsoleTextAttribute = lambda *_: None -else: - from ctypes import ( - byref, Structure, c_char, c_short, c_uint32, c_ushort - ) - - handles = { - STDOUT: windll.kernel32.GetStdHandle(STDOUT), - STDERR: windll.kernel32.GetStdHandle(STDERR), - } - - SHORT = c_short - WORD = c_ushort - DWORD = c_uint32 - TCHAR = c_char - - class COORD(Structure): - """struct in wincon.h""" - _fields_ = [ - ('X', SHORT), - ('Y', SHORT), - ] - - class SMALL_RECT(Structure): - """struct in wincon.h.""" - _fields_ = [ - ("Left", SHORT), - ("Top", SHORT), - ("Right", SHORT), - ("Bottom", SHORT), - ] - - class CONSOLE_SCREEN_BUFFER_INFO(Structure): - """struct in wincon.h.""" - _fields_ = [ - ("dwSize", COORD), - ("dwCursorPosition", COORD), - ("wAttributes", WORD), - ("srWindow", SMALL_RECT), - ("dwMaximumWindowSize", COORD), - ] - - def GetConsoleScreenBufferInfo(stream_id): - handle = handles[stream_id] - csbi = CONSOLE_SCREEN_BUFFER_INFO() - success = windll.kernel32.GetConsoleScreenBufferInfo( - handle, byref(csbi)) - # This fails when imported via setup.py when installing using 'pip' - # presumably the fix is that running setup.py should not trigger all - # this activity. - # assert success - return csbi - - def SetConsoleTextAttribute(stream_id, attrs): - handle = handles[stream_id] - success = windll.kernel32.SetConsoleTextAttribute(handle, attrs) - assert success - - def SetConsoleCursorPosition(stream_id, position): - handle = handles[stream_id] - position = COORD(*position) - success = windll.kernel32.SetConsoleCursorPosition(handle, position) - assert success - - def FillConsoleOutputCharacter(stream_id, char, length, start): - handle = handles[stream_id] - char = TCHAR(char) - length = DWORD(length) - start = COORD(*start) - num_written = DWORD(0) - # AttributeError: function 'FillConsoleOutputCharacter' not found - # could it just be that my types are wrong? - success = windll.kernel32.FillConsoleOutputCharacter( - handle, char, length, start, byref(num_written)) - assert success - return num_written.value - - -if __name__=='__main__': - x = GetConsoleScreenBufferInfo(STDOUT) - print(x.dwSize) - print(x.dwCursorPosition) - print(x.wAttributes) - print(x.srWindow) - print(x.dwMaximumWindowSize) - diff --git a/colorama/winterm.py b/colorama/winterm.py deleted file mode 100644 index 4326c21..0000000 --- a/colorama/winterm.py +++ /dev/null @@ -1,69 +0,0 @@ - -from . import win32 - - -# from wincon.h -class WinColor(object): - BLACK = 0 - BLUE = 1 - GREEN = 2 - CYAN = 3 - RED = 4 - MAGENTA = 5 - YELLOW = 6 - GREY = 7 - -# from wincon.h -class WinStyle(object): - NORMAL = 0x00 # dim text, dim background - BRIGHT = 0x08 # bright text, dim background - - -class WinTerm(object): - - def __init__(self): - self._default = \ - win32.GetConsoleScreenBufferInfo(win32.STDOUT).wAttributes - self.set_attrs(self._default) - self._default_fore = self._fore - self._default_back = self._back - self._default_style = self._style - - def get_attrs(self): - return self._fore + self._back * 16 + self._style - - def set_attrs(self, value): - self._fore = value & 7 - self._back = (value >> 4) & 7 - self._style = value & WinStyle.BRIGHT - - def reset_all(self, on_stderr=None): - self.set_attrs(self._default) - self.set_console(attrs=self._default) - - def fore(self, fore=None, on_stderr=False): - if fore is None: - fore = self._default_fore - self._fore = fore - self.set_console(on_stderr=on_stderr) - - def back(self, back=None, on_stderr=False): - if back is None: - back = self._default_back - self._back = back - self.set_console(on_stderr=on_stderr) - - def style(self, style=None, on_stderr=False): - if style is None: - style = self._default_style - self._style = style - self.set_console(on_stderr=on_stderr) - - def set_console(self, attrs=None, on_stderr=False): - if attrs is None: - attrs = self.get_attrs() - handle = win32.STDOUT - if on_stderr: - handle = win32.STDERR - win32.SetConsoleTextAttribute(handle, attrs) - diff --git a/jsbeautifier/__init__.py b/jsbeautifier/__init__.py deleted file mode 100644 index 18353f4..0000000 --- a/jsbeautifier/__init__.py +++ /dev/null @@ -1,1128 +0,0 @@ -import sys -import getopt -import re -import string - -# -# Originally written by Einar Lielmanis et al., -# Conversion to python by Einar Lielmanis, einar@jsbeautifier.org, -# MIT licence, enjoy. -# -# Python is not my native language, feel free to push things around. -# -# Use either from command line (script displays its usage when run -# without any parameters), -# -# -# or, alternatively, use it as a module: -# -# import jsbeautifier -# res = jsbeautifier.beautify('your javascript string') -# res = jsbeautifier.beautify_file('some_file.js') -# -# you may specify some options: -# -# opts = jsbeautifier.default_options() -# opts.indent_size = 2 -# res = jsbeautifier.beautify('some javascript', opts) -# -# -# Here are the available options: (read source) - - -class BeautifierOptions: - def __init__(self): - self.indent_size = 4 - self.indent_char = ' ' - self.indent_with_tabs = False - self.preserve_newlines = True - self.max_preserve_newlines = 10. - self.jslint_happy = False - self.brace_style = 'collapse' - self.keep_array_indentation = False - self.keep_function_indentation = False - self.eval_code = False - - - - def __repr__(self): - return \ -"""indent_size = %d -indent_char = [%s] -preserve_newlines = %s -max_preserve_newlines = %d -jslint_happy = %s -indent_with_tabs = %s -brace_style = %s -keep_array_indentation = %s -eval_code = %s -""" % ( self.indent_size, - self.indent_char, - self.preserve_newlines, - self.max_preserve_newlines, - self.jslint_happy, - self.indent_with_tabs, - self.brace_style, - self.keep_array_indentation, - self.eval_code, - ) - - -class BeautifierFlags: - def __init__(self, mode): - self.previous_mode = 'BLOCK' - self.mode = mode - self.var_line = False - self.var_line_tainted = False - self.var_line_reindented = False - self.in_html_comment = False - self.if_line = False - self.in_case = False - self.eat_next_space = False - self.indentation_baseline = -1 - self.indentation_level = 0 - self.ternary_depth = 0 - - -def default_options(): - return BeautifierOptions() - - -def beautify(string, opts = default_options() ): - b = Beautifier() - return b.beautify(string, opts) - - -def beautify_file(file_name, opts = default_options() ): - - if file_name == '-': # stdin - f = sys.stdin - else: - f = open(file_name) - - b = Beautifier() - return b.beautify(''.join(f.readlines()), opts) - - -def usage(): - - print("""Javascript beautifier (http://jsbeautifier.org/) - -Usage: jsbeautifier.py [options] - - can be "-", which means stdin. - defaults to stdout - -Input options: - - -i, --stdin read input from stdin - -Output options: - - -s, --indent-size=NUMBER indentation size. (default 4). - -c, --indent-char=CHAR character to indent with. (default space). - -t, --indent-with-tabs Indent with tabs, overrides -s and -c - -d, --disable-preserve-newlines do not preserve existing line breaks. - -j, --jslint-happy more jslint-compatible output - -b, --brace-style=collapse brace style (collapse, expand, end-expand) - -k, --keep-array-indentation keep array indentation. - -o, --outfile=FILE specify a file to output to (default stdout) - -f, --keep-function-indentation Do not re-indent function bodies defined in var lines. - -Rarely needed options: - - --eval-code evaluate code if a JS interpreter is - installed. May be useful with some obfuscated - script but poses a potential security issue. - - -l, --indent-level=NUMBER initial indentation level. (default 0). - - -h, --help, --usage prints this help statement. - -"""); - - - - - - -class Beautifier: - - def __init__(self, opts = default_options() ): - - self.opts = opts - self.blank_state() - - def blank_state(self): - - # internal flags - self.flags = BeautifierFlags('BLOCK') - self.flag_store = [] - self.wanted_newline = False - self.just_added_newline = False - self.do_block_just_closed = False - - if self.opts.indent_with_tabs: - self.indent_string = "\t" - else: - self.indent_string = self.opts.indent_char * self.opts.indent_size - - self.preindent_string = '' - self.last_word = '' # last TK_WORD seen - self.last_type = 'TK_START_EXPR' # last token type - self.last_text = '' # last token text - self.last_last_text = '' # pre-last token text - - self.input = None - self.output = [] # formatted javascript gets built here - - self.whitespace = ["\n", "\r", "\t", " "] - self.wordchar = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$' - self.digits = '0123456789' - self.punct = '+ - * / % & ++ -- = += -= *= /= %= == === != !== > < >= <= >> << >>> >>>= >>= <<= && &= | || ! !! , : ? ^ ^= |= ::' - self.punct += ' <%= <% %>' - self.punct = self.punct.split(' ') - - - # Words which always should start on a new line - self.line_starters = 'continue,try,throw,return,var,if,switch,case,default,for,while,break,function'.split(',') - self.set_mode('BLOCK') - - global parser_pos - parser_pos = 0 - - - def beautify(self, s, opts = None ): - - if opts != None: - self.opts = opts - - - if self.opts.brace_style not in ['expand', 'collapse', 'end-expand']: - raise(Exception('opts.brace_style must be "expand", "collapse" or "end-expand".')) - - self.blank_state() - - while s and s[0] in [' ', '\t']: - self.preindent_string += s[0] - s = s[1:] - - self.input = self.unpack(s, opts.eval_code) - - parser_pos = 0 - while True: - token_text, token_type = self.get_next_token() - #print (token_text, token_type, self.flags.mode) - if token_type == 'TK_EOF': - break - - handlers = { - 'TK_START_EXPR': self.handle_start_expr, - 'TK_END_EXPR': self.handle_end_expr, - 'TK_START_BLOCK': self.handle_start_block, - 'TK_END_BLOCK': self.handle_end_block, - 'TK_WORD': self.handle_word, - 'TK_SEMICOLON': self.handle_semicolon, - 'TK_STRING': self.handle_string, - 'TK_EQUALS': self.handle_equals, - 'TK_OPERATOR': self.handle_operator, - 'TK_BLOCK_COMMENT': self.handle_block_comment, - 'TK_INLINE_COMMENT': self.handle_inline_comment, - 'TK_COMMENT': self.handle_comment, - 'TK_UNKNOWN': self.handle_unknown, - } - - handlers[token_type](token_text) - - self.last_last_text = self.last_text - self.last_type = token_type - self.last_text = token_text - - sweet_code = self.preindent_string + re.sub('[\n ]+$', '', ''.join(self.output)) - return sweet_code - - def unpack(self, source, evalcode=False): - import jsbeautifier.unpackers as unpackers - try: - return unpackers.run(source, evalcode) - except unpackers.UnpackingError as error: - print('error:', error) - return '' - - def trim_output(self, eat_newlines = False): - while len(self.output) \ - and ( - self.output[-1] == ' '\ - or self.output[-1] == self.indent_string \ - or self.output[-1] == self.preindent_string \ - or (eat_newlines and self.output[-1] in ['\n', '\r'])): - self.output.pop() - - - def is_array(self, mode): - return mode in ['[EXPRESSION]', '[INDENDED-EXPRESSION]'] - - - def is_expression(self, mode): - return mode in ['[EXPRESSION]', '[INDENDED-EXPRESSION]', '(EXPRESSION)'] - - - def append_newline_forced(self): - old_array_indentation = self.opts.keep_array_indentation - self.opts.keep_array_indentation = False - self.append_newline() - self.opts.keep_array_indentation = old_array_indentation - - def append_newline(self, ignore_repeated = True): - - self.flags.eat_next_space = False; - - if self.opts.keep_array_indentation and self.is_array(self.flags.mode): - return - - self.flags.if_line = False; - self.trim_output(); - - if len(self.output) == 0: - # no newline on start of file - return - - if self.output[-1] != '\n' or not ignore_repeated: - self.just_added_newline = True - self.output.append('\n') - - if self.preindent_string: - self.output.append(self.preindent_string) - - for i in range(self.flags.indentation_level): - self.output.append(self.indent_string) - - if self.flags.var_line and self.flags.var_line_reindented: - self.output.append(self.indent_string) - - - def append(self, s): - if s == ' ': - # make sure only single space gets drawn - if self.flags.eat_next_space: - self.flags.eat_next_space = False - elif len(self.output) and self.output[-1] not in [' ', '\n', self.indent_string]: - self.output.append(' ') - else: - self.just_added_newline = False - self.flags.eat_next_space = False - self.output.append(s) - - - def indent(self): - self.flags.indentation_level = self.flags.indentation_level + 1 - - - def remove_indent(self): - if len(self.output) and self.output[-1] in [self.indent_string, self.preindent_string]: - self.output.pop() - - - def set_mode(self, mode): - - prev = BeautifierFlags('BLOCK') - - if self.flags: - self.flag_store.append(self.flags) - prev = self.flags - - self.flags = BeautifierFlags(mode) - - if len(self.flag_store) == 1: - self.flags.indentation_level = 0 - else: - self.flags.indentation_level = prev.indentation_level - if prev.var_line and prev.var_line_reindented: - self.flags.indentation_level = self.flags.indentation_level + 1 - self.flags.previous_mode = prev.mode - - - def restore_mode(self): - self.do_block_just_closed = self.flags.mode == 'DO_BLOCK' - if len(self.flag_store) > 0: - self.flags = self.flag_store.pop() - - - def get_next_token(self): - - global parser_pos - - self.n_newlines = 0 - - if parser_pos >= len(self.input): - return '', 'TK_EOF' - - self.wanted_newline = False; - c = self.input[parser_pos] - parser_pos += 1 - - keep_whitespace = self.opts.keep_array_indentation and self.is_array(self.flags.mode) - - if keep_whitespace: - # slight mess to allow nice preservation of array indentation and reindent that correctly - # first time when we get to the arrays: - # var a = [ - # ....'something' - # we make note of whitespace_count = 4 into flags.indentation_baseline - # so we know that 4 whitespaces in original source match indent_level of reindented source - # - # and afterwards, when we get to - # 'something, - # .......'something else' - # we know that this should be indented to indent_level + (7 - indentation_baseline) spaces - - whitespace_count = 0 - while c in self.whitespace: - if c == '\n': - self.trim_output() - self.output.append('\n') - self.just_added_newline = True - whitespace_count = 0 - elif c == '\t': - whitespace_count += 4 - elif c == '\r': - pass - else: - whitespace_count += 1 - - if parser_pos >= len(self.input): - return '', 'TK_EOF' - - c = self.input[parser_pos] - parser_pos += 1 - - if self.flags.indentation_baseline == -1: - - self.flags.indentation_baseline = whitespace_count - - if self.just_added_newline: - for i in range(self.flags.indentation_level + 1): - self.output.append(self.indent_string) - - if self.flags.indentation_baseline != -1: - for i in range(whitespace_count - self.flags.indentation_baseline): - self.output.append(' ') - - else: # not keep_whitespace - while c in self.whitespace: - if c == '\n': - if self.opts.max_preserve_newlines == 0 or self.opts.max_preserve_newlines > self.n_newlines: - self.n_newlines += 1 - - if parser_pos >= len(self.input): - return '', 'TK_EOF' - - c = self.input[parser_pos] - parser_pos += 1 - - if self.opts.preserve_newlines and self.n_newlines > 1: - for i in range(self.n_newlines): - self.append_newline(i == 0) - self.just_added_newline = True - - self.wanted_newline = self.n_newlines > 0 - - - if c in self.wordchar: - if parser_pos < len(self.input): - while self.input[parser_pos] in self.wordchar: - c = c + self.input[parser_pos] - parser_pos += 1 - if parser_pos == len(self.input): - break - - # small and surprisingly unugly hack for 1E-10 representation - if parser_pos != len(self.input) and self.input[parser_pos] in '+-' \ - and re.match('^[0-9]+[Ee]$', c): - - sign = self.input[parser_pos] - parser_pos += 1 - t = self.get_next_token() - c += sign + t[0] - return c, 'TK_WORD' - - if c == 'in': # in is an operator, need to hack - return c, 'TK_OPERATOR' - - if self.wanted_newline and \ - self.last_type != 'TK_OPERATOR' and\ - self.last_type != 'TK_EQUALS' and\ - not self.flags.if_line and \ - (self.opts.preserve_newlines or self.last_text != 'var'): - self.append_newline() - - return c, 'TK_WORD' - - if c in '([': - return c, 'TK_START_EXPR' - - if c in ')]': - return c, 'TK_END_EXPR' - - if c == '{': - return c, 'TK_START_BLOCK' - - if c == '}': - return c, 'TK_END_BLOCK' - - if c == ';': - return c, 'TK_SEMICOLON' - - if c == '/': - comment = '' - inline_comment = True - comment_mode = 'TK_INLINE_COMMENT' - if self.input[parser_pos] == '*': # peek /* .. */ comment - parser_pos += 1 - if parser_pos < len(self.input): - while not (self.input[parser_pos] == '*' and \ - parser_pos + 1 < len(self.input) and \ - self.input[parser_pos + 1] == '/')\ - and parser_pos < len(self.input): - c = self.input[parser_pos] - comment += c - if c in '\r\n': - comment_mode = 'TK_BLOCK_COMMENT' - parser_pos += 1 - if parser_pos >= len(self.input): - break - parser_pos += 2 - return '/*' + comment + '*/', comment_mode - if self.input[parser_pos] == '/': # peek // comment - comment = c - while self.input[parser_pos] not in '\r\n': - comment += self.input[parser_pos] - parser_pos += 1 - if parser_pos >= len(self.input): - break - parser_pos += 1 - if self.wanted_newline: - self.append_newline() - return comment, 'TK_COMMENT' - - - - if c == "'" or c == '"' or \ - (c == '/' and ((self.last_type == 'TK_WORD' and self.last_text in ['return', 'do']) or \ - (self.last_type in ['TK_COMMENT', 'TK_START_EXPR', 'TK_START_BLOCK', 'TK_END_BLOCK', 'TK_OPERATOR', - 'TK_EQUALS', 'TK_EOF', 'TK_SEMICOLON']))): - sep = c - esc = False - resulting_string = c - in_char_class = False - - if parser_pos < len(self.input): - if sep == '/': - # handle regexp - in_char_class = False - while esc or in_char_class or self.input[parser_pos] != sep: - resulting_string += self.input[parser_pos] - if not esc: - esc = self.input[parser_pos] == '\\' - if self.input[parser_pos] == '[': - in_char_class = True - elif self.input[parser_pos] == ']': - in_char_class = False - else: - esc = False - parser_pos += 1 - if parser_pos >= len(self.input): - # incomplete regex when end-of-file reached - # bail out with what has received so far - return resulting_string, 'TK_STRING' - else: - # handle string - while esc or self.input[parser_pos] != sep: - resulting_string += self.input[parser_pos] - if not esc: - esc = self.input[parser_pos] == '\\' - else: - esc = False - parser_pos += 1 - if parser_pos >= len(self.input): - # incomplete string when end-of-file reached - # bail out with what has received so far - return resulting_string, 'TK_STRING' - - - parser_pos += 1 - resulting_string += sep - if sep == '/': - # regexps may have modifiers /regexp/MOD, so fetch those too - while parser_pos < len(self.input) and self.input[parser_pos] in self.wordchar: - resulting_string += self.input[parser_pos] - parser_pos += 1 - return resulting_string, 'TK_STRING' - - if c == '#': - - # she-bang - if len(self.output) == 0 and len(self.input) > 1 and self.input[parser_pos] == '!': - resulting_string = c - while parser_pos < len(self.input) and c != '\n': - c = self.input[parser_pos] - resulting_string += c - parser_pos += 1 - self.output.append(resulting_string.strip() + "\n") - self.append_newline() - return self.get_next_token() - - - # Spidermonkey-specific sharp variables for circular references - # https://developer.mozilla.org/En/Sharp_variables_in_JavaScript - # http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp around line 1935 - sharp = '#' - if parser_pos < len(self.input) and self.input[parser_pos] in self.digits: - while True: - c = self.input[parser_pos] - sharp += c - parser_pos += 1 - if parser_pos >= len(self.input) or c == '#' or c == '=': - break - if c == '#' or parser_pos >= len(self.input): - pass - elif self.input[parser_pos] == '[' and self.input[parser_pos + 1] == ']': - sharp += '[]' - parser_pos += 2 - elif self.input[parser_pos] == '{' and self.input[parser_pos + 1] == '}': - sharp += '{}' - parser_pos += 2 - return sharp, 'TK_WORD' - - if c == '<' and self.input[parser_pos - 1 : parser_pos + 3] == '': - self.flags.in_html_comment = False - parser_pos += 2 - if self.wanted_newline: - self.append_newline() - return '-->', 'TK_COMMENT' - - if c in self.punct: - while parser_pos < len(self.input) and c + self.input[parser_pos] in self.punct: - c += self.input[parser_pos] - parser_pos += 1 - if parser_pos >= len(self.input): - break - if c == '=': - return c, 'TK_EQUALS' - else: - return c, 'TK_OPERATOR' - return c, 'TK_UNKNOWN' - - - - def handle_start_expr(self, token_text): - if token_text == '[': - if self.last_type == 'TK_WORD' or self.last_text == ')': - if self.last_text in self.line_starters: - self.append(' ') - self.set_mode('(EXPRESSION)') - self.append(token_text) - return - - if self.flags.mode in ['[EXPRESSION]', '[INDENTED-EXPRESSION]']: - if self.last_last_text == ']' and self.last_text == ',': - # ], [ goes to a new line - if self.flags.mode == '[EXPRESSION]': - self.flags.mode = '[INDENTED-EXPRESSION]' - if not self.opts.keep_array_indentation: - self.indent() - self.set_mode('[EXPRESSION]') - if not self.opts.keep_array_indentation: - self.append_newline() - elif self.last_text == '[': - if self.flags.mode == '[EXPRESSION]': - self.flags.mode = '[INDENTED-EXPRESSION]' - if not self.opts.keep_array_indentation: - self.indent() - self.set_mode('[EXPRESSION]') - - if not self.opts.keep_array_indentation: - self.append_newline() - else: - self.set_mode('[EXPRESSION]') - else: - self.set_mode('[EXPRESSION]') - else: - self.set_mode('(EXPRESSION)') - - - if self.last_text == ';' or self.last_type == 'TK_START_BLOCK': - self.append_newline() - elif self.last_type in ['TK_END_EXPR', 'TK_START_EXPR', 'TK_END_BLOCK'] or self.last_text == '.': - # do nothing on (( and )( and ][ and ]( and .( - pass - elif self.last_type not in ['TK_WORD', 'TK_OPERATOR']: - self.append(' ') - elif self.last_word == 'function' or self.last_word == 'typeof': - # function() vs function (), typeof() vs typeof () - if self.opts.jslint_happy: - self.append(' ') - elif self.last_text in self.line_starters or self.last_text == 'catch': - self.append(' ') - - self.append(token_text) - - - def handle_end_expr(self, token_text): - if token_text == ']': - if self.opts.keep_array_indentation: - if self.last_text == '}': - self.remove_indent() - self.append(token_text) - self.restore_mode() - return - else: - if self.flags.mode == '[INDENTED-EXPRESSION]': - if self.last_text == ']': - self.restore_mode() - self.append_newline() - self.append(token_text) - return - self.restore_mode() - self.append(token_text) - - - def handle_start_block(self, token_text): - if self.last_word == 'do': - self.set_mode('DO_BLOCK') - else: - self.set_mode('BLOCK') - - if self.opts.brace_style == 'expand': - if self.last_type != 'TK_OPERATOR': - if self.last_text in ['return', '=']: - self.append(' ') - else: - self.append_newline(True) - - self.append(token_text) - self.indent() - else: - if self.last_type not in ['TK_OPERATOR', 'TK_START_EXPR']: - if self.last_type == 'TK_START_BLOCK': - self.append_newline() - else: - self.append(' ') - else: - # if TK_OPERATOR or TK_START_EXPR - if self.is_array(self.flags.previous_mode) and self.last_text == ',': - if self.last_last_text == '}': - self.append(' ') - else: - self.append_newline() - self.indent() - self.append(token_text) - - - def handle_end_block(self, token_text): - self.restore_mode() - if self.opts.brace_style == 'expand': - if self.last_text != '{': - self.append_newline() - else: - if self.last_type == 'TK_START_BLOCK': - if self.just_added_newline: - self.remove_indent() - else: - # {} - self.trim_output() - else: - if self.is_array(self.flags.mode) and self.opts.keep_array_indentation: - self.opts.keep_array_indentation = False - self.append_newline() - self.opts.keep_array_indentation = True - else: - self.append_newline() - - self.append(token_text) - - - def handle_word(self, token_text): - if self.do_block_just_closed: - self.append(' ') - self.append(token_text) - self.append(' ') - self.do_block_just_closed = False - return - - if token_text == 'function': - - if self.flags.var_line: - self.flags.var_line_reindented = not self.opts.keep_function_indentation - if (self.just_added_newline or self.last_text == ';') and self.last_text != '{': - # make sure there is a nice clean space of at least one blank line - # before a new function definition - have_newlines = self.n_newlines - if not self.just_added_newline: - have_newlines = 0 - if not self.opts.preserve_newlines: - have_newlines = 1 - for i in range(2 - have_newlines): - self.append_newline(False) - - if token_text in ['case', 'default']: - if self.last_text == ':': - self.remove_indent() - else: - self.flags.indentation_level -= 1 - self.append_newline() - self.flags.indentation_level += 1 - self.append(token_text) - self.flags.in_case = True - return - - prefix = 'NONE' - - if self.last_type == 'TK_END_BLOCK': - if token_text not in ['else', 'catch', 'finally']: - prefix = 'NEWLINE' - else: - if self.opts.brace_style in ['expand', 'end-expand']: - prefix = 'NEWLINE' - else: - prefix = 'SPACE' - self.append(' ') - elif self.last_type == 'TK_SEMICOLON' and self.flags.mode in ['BLOCK', 'DO_BLOCK']: - prefix = 'NEWLINE' - elif self.last_type == 'TK_SEMICOLON' and self.is_expression(self.flags.mode): - prefix = 'SPACE' - elif self.last_type == 'TK_STRING': - prefix = 'NEWLINE' - elif self.last_type == 'TK_WORD': - if self.last_text == 'else': - # eat newlines between ...else *** some_op... - # won't preserve extra newlines in this place (if any), but don't care that much - self.trim_output(True); - prefix = 'SPACE' - elif self.last_type == 'TK_START_BLOCK': - prefix = 'NEWLINE' - elif self.last_type == 'TK_END_EXPR': - self.append(' ') - prefix = 'NEWLINE' - - if self.flags.if_line and self.last_type == 'TK_END_EXPR': - self.flags.if_line = False - - if token_text in self.line_starters: - if self.last_text == 'else': - prefix = 'SPACE' - else: - prefix = 'NEWLINE' - - if token_text in ['else', 'catch', 'finally']: - if self.last_type != 'TK_END_BLOCK' \ - or self.opts.brace_style == 'expand' \ - or self.opts.brace_style == 'end-expand': - self.append_newline() - else: - self.trim_output(True) - self.append(' ') - elif prefix == 'NEWLINE': - if token_text == 'function' and (self.last_type == 'TK_START_EXPR' or self.last_text in '=,'): - # no need to force newline on "function" - - # (function... - pass - elif token_text == 'function' and self.last_text == 'new': - self.append(' ') - elif self.last_text in ['return', 'throw']: - # no newline between return nnn - self.append(' ') - elif self.last_type != 'TK_END_EXPR': - if (self.last_type != 'TK_START_EXPR' or token_text != 'var') and self.last_text != ':': - # no need to force newline on VAR - - # for (var x = 0... - if token_text == 'if' and self.last_word == 'else' and self.last_text != '{': - self.append(' ') - else: - self.flags.var_line = False - self.flags.var_line_reindented = False - self.append_newline() - elif token_text in self.line_starters and self.last_text != ')': - self.flags.var_line = False - self.flags.var_line_reindented = False - self.append_newline() - elif self.is_array(self.flags.mode) and self.last_text == ',' and self.last_last_text == '}': - self.append_newline() # }, in lists get a newline - elif prefix == 'SPACE': - self.append(' ') - - - self.append(token_text) - self.last_word = token_text - - if token_text == 'var': - self.flags.var_line = True - self.flags.var_line_reindented = False - self.flags.var_line_tainted = False - - - if token_text == 'if': - self.flags.if_line = True - - if token_text == 'else': - self.flags.if_line = False - - - def handle_semicolon(self, token_text): - self.append(token_text) - self.flags.var_line = False - self.flags.var_line_reindented = False - if self.flags.mode == 'OBJECT': - # OBJECT mode is weird and doesn't get reset too well. - self.flags.mode = 'BLOCK' - - - def handle_string(self, token_text): - if self.last_type in ['TK_START_BLOCK', 'TK_END_BLOCK', 'TK_SEMICOLON']: - self.append_newline() - elif self.last_type == 'TK_WORD': - self.append(' ') - - # Try to replace readable \x-encoded characters with their equivalent, - # if it is possible (e.g. '\x41\x42\x43\x01' becomes 'ABC\x01'). - def unescape(match): - block, code = match.group(0, 1) - char = chr(int(code, 16)) - if block.count('\\') == 1 and char in string.printable: - return char - return block - - token_text = re.sub(r'\\{1,2}x([a-fA-F0-9]{2})', unescape, token_text) - - self.append(token_text) - - def handle_equals(self, token_text): - if self.flags.var_line: - # just got an '=' in a var-line, different line breaking rules will apply - self.flags.var_line_tainted = True - - self.append(' ') - self.append(token_text) - self.append(' ') - - - def handle_operator(self, token_text): - space_before = True - space_after = True - - if self.flags.var_line and token_text == ',' and self.is_expression(self.flags.mode): - # do not break on comma, for ( var a = 1, b = 2 - self.flags.var_line_tainted = False - - if self.flags.var_line and token_text == ',': - if self.flags.var_line_tainted: - self.append(token_text) - self.flags.var_line_reindented = True - self.flags.var_line_tainted = False - self.append_newline() - return - else: - self.flags.var_line_tainted = False - - if self.last_text in ['return', 'throw']: - # return had a special handling in TK_WORD - self.append(' ') - self.append(token_text) - return - - if token_text == ':' and self.flags.in_case: - self.append(token_text) - self.append_newline() - self.flags.in_case = False - return - - if token_text == '::': - # no spaces around the exotic namespacing syntax operator - self.append(token_text) - return - - if token_text == ',': - if self.flags.var_line: - if self.flags.var_line_tainted: - # This never happens, as it's handled previously, right? - self.append(token_text) - self.append_newline() - self.flags.var_line_tainted = False - else: - self.append(token_text) - self.append(' ') - elif self.last_type == 'TK_END_BLOCK' and self.flags.mode != '(EXPRESSION)': - self.append(token_text) - if self.flags.mode == 'OBJECT' and self.last_text == '}': - self.append_newline() - else: - self.append(' ') - else: - if self.flags.mode == 'OBJECT': - self.append(token_text) - self.append_newline() - else: - # EXPR or DO_BLOCK - self.append(token_text) - self.append(' ') - # comma handled - return - elif token_text in ['--', '++', '!'] \ - or (token_text in ['+', '-'] \ - and self.last_type in ['TK_START_BLOCK', 'TK_START_EXPR', 'TK_EQUALS', 'TK_OPERATOR']) \ - or self.last_text in self.line_starters: - - space_before = False - space_after = False - - if self.last_text == ';' and self.is_expression(self.flags.mode): - # for (;; ++i) - # ^^ - space_before = True - - if self.last_type == 'TK_WORD' and self.last_text in self.line_starters: - space_before = True - - if self.flags.mode == 'BLOCK' and self.last_text in ['{', ';']: - # { foo: --i } - # foo(): --bar - self.append_newline() - - elif token_text == '.': - # decimal digits or object.property - space_before = False - - elif token_text == ':': - if self.flags.ternary_depth == 0: - self.flags.mode = 'OBJECT' - space_before = False - else: - self.flags.ternary_depth -= 1 - elif token_text == '?': - self.flags.ternary_depth += 1 - - if space_before: - self.append(' ') - - self.append(token_text) - - if space_after: - self.append(' ') - - - - def handle_block_comment(self, token_text): - - lines = token_text.replace('\x0d', '').split('\x0a') - # all lines start with an asterisk? that's a proper box comment - if not any(l for l in lines[1:] if ( l.strip() == '' or (l.lstrip())[0] != '*')): - self.append_newline() - self.append(lines[0]) - for line in lines[1:]: - self.append_newline() - self.append(' ' + line.strip()) - else: - # simple block comment: leave intact - if len(lines) > 1: - # multiline comment starts on a new line - self.append_newline() - self.trim_output() - else: - # single line /* ... */ comment stays on the same line - self.append(' ') - for line in lines: - self.append(line) - self.append('\n') - self.append_newline() - - - def handle_inline_comment(self, token_text): - self.append(' ') - self.append(token_text) - if self.is_expression(self.flags.mode): - self.append(' ') - else: - self.append_newline_forced() - - - def handle_comment(self, token_text): - if self.wanted_newline: - self.append_newline() - else: - self.append(' ') - - self.append(token_text) - self.append_newline_forced() - - - def handle_unknown(self, token_text): - if self.last_text in ['return', 'throw']: - self.append(' ') - - self.append(token_text) - - - - - -def main(): - - argv = sys.argv[1:] - - try: - opts, args = getopt.getopt(argv, "s:c:o:djbkil:h:t:f", ['indent-size=','indent-char=','outfile=', 'disable-preserve-newlines', - 'jslint-happy', 'brace-style=', - 'keep-array-indentation', 'indent-level=', 'help', - 'usage', 'stdin', 'eval-code', 'indent-with-tabs', 'keep-function-indentation']) - except getopt.GetoptError: - usage() - sys.exit(2) - - js_options = default_options() - - file = None - outfile = 'stdout' - if len(args) == 1: - file = args[0] - - for opt, arg in opts: - if opt in ('--keep-array-indentation', '-k'): - js_options.keep_array_indentation = True - if opt in ('--keep-function-indentation','-f'): - js_options.keep_function_indentation = True - elif opt in ('--outfile', '-o'): - outfile = arg - elif opt in ('--indent-size', '-s'): - js_options.indent_size = int(arg) - elif opt in ('--indent-char', '-c'): - js_options.indent_char = arg - elif opt in ('--indent-with-tabs', '-t'): - js_options.indent_with_tabs = True - elif opt in ('--disable-preserve_newlines', '-d'): - js_options.preserve_newlines = False - elif opt in ('--jslint-happy', '-j'): - js_options.jslint_happy = True - elif opt in ('--eval-code'): - js_options.eval_code = True - elif opt in ('--brace-style', '-b'): - js_options.brace_style = arg - elif opt in ('--stdin', '-i'): - file = '-' - elif opt in ('--help', '--usage', '--h'): - return usage() - - if not file: - return usage() - else: - if outfile == 'stdout': - print(beautify_file(file, js_options)) - else: - f = open(outfile, 'w') - f.write(beautify_file(file, js_options) + '\n') - f.close() - diff --git a/jsbeautifier/unpackers/README.specs.mkd b/jsbeautifier/unpackers/README.specs.mkd deleted file mode 100644 index e937b76..0000000 --- a/jsbeautifier/unpackers/README.specs.mkd +++ /dev/null @@ -1,25 +0,0 @@ -# UNPACKERS SPECIFICATIONS - -Nothing very difficult: an unpacker is a submodule placed in the directory -where this file was found. Each unpacker must define three symbols: - - * `PRIORITY` : integer number expressing the priority in applying this - unpacker. Lower number means higher priority. - Makes sense only if a source file has been packed with - more than one packer. - * `detect(source)` : returns `True` if source is packed, otherwise, `False`. - * `unpack(source)` : takes a `source` string and unpacks it. Must always return - valid JavaScript. That is to say, your code should look - like: - -``` -if detect(source): - return do_your_fancy_things_with(source) -else: - return source -``` - -*You can safely define any other symbol in your module, as it will be ignored.* - -`__init__` code will automatically load new unpackers, without any further step -to be accomplished. Simply drop it in this directory. diff --git a/jsbeautifier/unpackers/__init__.py b/jsbeautifier/unpackers/__init__.py deleted file mode 100644 index 6d13653..0000000 --- a/jsbeautifier/unpackers/__init__.py +++ /dev/null @@ -1,67 +0,0 @@ -# -# General code for JSBeautifier unpackers infrastructure. See README.specs -# written by Stefano Sanfilippo -# - -"""General code for JSBeautifier unpackers infrastructure.""" - -import pkgutil -import re -from jsbeautifier.unpackers import evalbased - -# NOTE: AT THE MOMENT, IT IS DEACTIVATED FOR YOUR SECURITY: it runs js! -BLACKLIST = ['jsbeautifier.unpackers.evalbased'] - -class UnpackingError(Exception): - """Badly packed source or general error. Argument is a - meaningful description.""" - pass - -def getunpackers(): - """Scans the unpackers dir, finds unpackers and add them to UNPACKERS list. - An unpacker will be loaded only if it is a valid python module (name must - adhere to naming conventions) and it is not blacklisted (i.e. inserted - into BLACKLIST.""" - path = __path__ - prefix = __name__ + '.' - unpackers = [] - interface = ['unpack', 'detect', 'PRIORITY'] - for _importer, modname, _ispkg in pkgutil.iter_modules(path, prefix): - if 'tests' not in modname and modname not in BLACKLIST: - try: - module = __import__(modname, fromlist=interface) - except ImportError: - raise UnpackingError('Bad unpacker: %s' % modname) - else: - unpackers.append(module) - - return sorted(unpackers, key = lambda mod: mod.PRIORITY) - -UNPACKERS = getunpackers() - -def run(source, evalcode=False): - """Runs the applicable unpackers and return unpacked source as a string.""" - for unpacker in [mod for mod in UNPACKERS if mod.detect(source)]: - source = unpacker.unpack(source) - if evalcode and evalbased.detect(source): - source = evalbased.unpack(source) - return source - -def filtercomments(source): - """NOT USED: strips trailing comments and put them at the top.""" - trailing_comments = [] - comment = True - - while comment: - if re.search(r'^\s*\/\*', source): - comment = source[0, source.index('*/') + 2] - elif re.search(r'^\s*\/\/', source): - comment = re.search(r'^\s*\/\/', source).group(0) - else: - comment = None - - if comment: - source = re.sub(r'^\s+', '', source[len(comment):]) - trailing_comments.append(comment) - - return '\n'.join(trailing_comments) + source diff --git a/jsbeautifier/unpackers/evalbased.py b/jsbeautifier/unpackers/evalbased.py deleted file mode 100644 index b17d926..0000000 --- a/jsbeautifier/unpackers/evalbased.py +++ /dev/null @@ -1,39 +0,0 @@ -# -# Unpacker for eval() based packers, a part of javascript beautifier -# by Einar Lielmanis -# -# written by Stefano Sanfilippo -# -# usage: -# -# if detect(some_string): -# unpacked = unpack(some_string) -# - -"""Unpacker for eval() based packers: runs JS code and returns result. -Works only if a JS interpreter (e.g. Mozilla's Rhino) is installed and -properly set up on host.""" - -from subprocess import PIPE, Popen - -PRIORITY = 3 - -def detect(source): - """Detects if source is likely to be eval() packed.""" - return source.strip().lower().startswith('eval(function(') - -def unpack(source): - """Runs source and return resulting code.""" - return jseval('print %s;' % source[4:]) if detect(source) else source - -# In case of failure, we'll just return the original, without crashing on user. -def jseval(script): - """Run code in the JS interpreter and return output.""" - try: - interpreter = Popen(['js'], stdin=PIPE, stdout=PIPE) - except OSError: - return script - result, errors = interpreter.communicate(script) - if interpreter.poll() or errors: - return script - return result diff --git a/jsbeautifier/unpackers/javascriptobfuscator.py b/jsbeautifier/unpackers/javascriptobfuscator.py deleted file mode 100644 index aa4344a..0000000 --- a/jsbeautifier/unpackers/javascriptobfuscator.py +++ /dev/null @@ -1,58 +0,0 @@ -# -# simple unpacker/deobfuscator for scripts messed up with -# javascriptobfuscator.com -# -# written by Einar Lielmanis -# rewritten in Python by Stefano Sanfilippo -# -# Will always return valid javascript: if `detect()` is false, `code` is -# returned, unmodified. -# -# usage: -# -# if javascriptobfuscator.detect(some_string): -# some_string = javascriptobfuscator.unpack(some_string) -# - -"""deobfuscator for scripts messed up with JavascriptObfuscator.com""" - -import re - -PRIORITY = 1 - -def smartsplit(code): - """Split `code` at " symbol, only if it is not escaped.""" - strings = [] - pos = 0 - while pos < len(code): - if code[pos] == '"': - word = '' # new word - pos += 1 - while pos < len(code): - if code[pos] == '"': - break - if code[pos] == '\\': - word += '\\' - pos += 1 - word += code[pos] - pos += 1 - strings.append('"%s"' % word) - pos += 1 - return strings - -def detect(code): - """Detects if `code` is JavascriptObfuscator.com packed.""" - # prefer `is not` idiom, so that a true boolean is returned - return (re.search(r'^var _0x[a-f0-9]+ ?\= ?\[', code) is not None) - -def unpack(code): - """Unpacks JavascriptObfuscator.com packed code.""" - if detect(code): - matches = re.search(r'var (_0x[a-f\d]+) ?\= ?\[(.*?)\];', code) - if matches: - variable = matches.group(1) - dictionary = smartsplit(matches.group(2)) - code = code[len(matches.group(0)):] - for key, value in enumerate(dictionary): - code = code.replace(r'%s[%s]' % (variable, key), value) - return code diff --git a/jsbeautifier/unpackers/myobfuscate.py b/jsbeautifier/unpackers/myobfuscate.py deleted file mode 100644 index 52e1003..0000000 --- a/jsbeautifier/unpackers/myobfuscate.py +++ /dev/null @@ -1,86 +0,0 @@ -# -# deobfuscator for scripts messed up with myobfuscate.com -# by Einar Lielmanis -# -# written by Stefano Sanfilippo -# -# usage: -# -# if detect(some_string): -# unpacked = unpack(some_string) -# - -# CAVEAT by Einar Lielmanis - -# -# You really don't want to obfuscate your scripts there: they're tracking -# your unpackings, your script gets turned into something like this, -# as of 2011-08-26: -# -# var _escape = 'your_script_escaped'; -# var _111 = document.createElement('script'); -# _111.src = 'http://api.www.myobfuscate.com/?getsrc=ok' + -# '&ref=' + encodeURIComponent(document.referrer) + -# '&url=' + encodeURIComponent(document.URL); -# var 000 = document.getElementsByTagName('head')[0]; -# 000.appendChild(_111); -# document.write(unescape(_escape)); -# - -"""Deobfuscator for scripts messed up with MyObfuscate.com""" - -import re -import base64 - -# Python 2 retrocompatibility -# pylint: disable=F0401 -# pylint: disable=E0611 -try: - from urllib import unquote -except ImportError: - from urllib.parse import unquote - -from jsbeautifier.unpackers import UnpackingError - -PRIORITY = 1 - -CAVEAT = """// -// Unpacker warning: be careful when using myobfuscate.com for your projects: -// scripts obfuscated by the free online version call back home. -// - -""" - -SIGNATURE = (r'["\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\x4F' - r'\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5A\x61\x62\x63\x64\x65' - r'\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x74\x75' - r'\x76\x77\x78\x79\x7A\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x2B' - r'\x2F\x3D","","\x63\x68\x61\x72\x41\x74","\x69\x6E\x64\x65\x78' - r'\x4F\x66","\x66\x72\x6F\x6D\x43\x68\x61\x72\x43\x6F\x64\x65","' - r'\x6C\x65\x6E\x67\x74\x68"]') - -def detect(source): - """Detects MyObfuscate.com packer.""" - return SIGNATURE in source - -def unpack(source): - """Unpacks js code packed with MyObfuscate.com""" - if not detect(source): - return source - payload = unquote(_filter(source)) - match = re.search(r"^var _escape\='' preDefinedCode = 'var app = this;' +# Regex that matches any character that's <32 && >127 and not a whitespace. +bad_chars_re = "|".join(re.escape(chr(ch)) for ch in ( + [ch for ch in range(32) if chr(ch) not in "\n\r\t\f"] + + [ch for ch in range(128, 256)] +)) def analyseJS(code, context=None, manualAnalysis=False): ''' Hooks the eval function and search for obfuscated elements in the Javascript code - + @param code: The Javascript code (string) - @return: List with analysis information of the Javascript code: [JSCode,unescapedBytes,urlsFound,errors,context], where + @return: List with analysis information of the Javascript code: [JSCode,unescapedBytes,urlsFound,errors,context], where JSCode is a list with the several stages Javascript code, - unescapedBytes is a list with the parameters of unescape functions, + unescapedBytes is a list with the parameters of unescape functions, urlsFound is a list with the URLs found in the unescaped bytes, errors is a list of errors, context is the context of execution of the Javascript code. @@ -71,7 +78,7 @@ def analyseJS(code, context=None, manualAnalysis=False): jsCode = [] unescapedBytes = [] urlsFound = [] - + try: code = unescapeHTMLEntities(code) scriptElements = re.findall(reJSscript, code, re.DOTALL | re.IGNORECASE) @@ -81,16 +88,15 @@ def analyseJS(code, context=None, manualAnalysis=False): code += scriptElement + '\n\n' code = jsbeautifier.beautify(code) jsCode.append(code) - + if code is not None and JS_MODULE and not manualAnalysis: if context is None: context = PyV8.JSContext(Global()) context.enter() # Hooking the eval function context.eval('eval=evalOverride') - #context.eval(preDefinedCode) + # context.eval(preDefinedCode) while True: - originalCode = code try: context.eval(code) evalCode = context.eval('evalCode') @@ -102,10 +108,11 @@ def analyseJS(code, context=None, manualAnalysis=False): break except: error = str(sys.exc_info()[1]) - open('jserror.log', 'ab').write(error + newLine) + f = open(os.path.expanduser("~/.peepdf-jserror.log"), "ab") + f.write(error + newLine) errors.append(error) break - + if code != '': escapedVars = re.findall('(\w*?)\s*?=\s*?(unescape\((.*?)\))', code, re.DOTALL) for var in escapedVars: @@ -118,10 +125,10 @@ def analyseJS(code, context=None, manualAnalysis=False): bytes = ret[1] urls = re.findall('https?://.*$', bytes, re.DOTALL) if bytes not in unescapedBytes: - unescapedBytes.append(bytes) + unescapedBytes.append(bytes) for url in urls: - if url not in urlsFound: - urlsFound.append(url) + if url not in urlsFound: + urlsFound.append(url) else: bytes = bytes[1:-1] if len(bytes) > 150: @@ -130,24 +137,24 @@ def analyseJS(code, context=None, manualAnalysis=False): bytes = ret[1] urls = re.findall('https?://.*$', bytes, re.DOTALL) if bytes not in unescapedBytes: - unescapedBytes.append(bytes) + unescapedBytes.append(bytes) for url in urls: - if url not in urlsFound: - urlsFound.append(url) + if url not in urlsFound: + urlsFound.append(url) except: traceback.print_exc(file=open(errorsFile, 'a')) errors.append('Unexpected error in the JSAnalysis module!!') finally: for js in jsCode: if js is None or js == '': - jsCode.remove(js) + jsCode.remove(js) return [jsCode, unescapedBytes, urlsFound, errors, context] def getVarContent(jsCode, varContent): ''' Given the Javascript code and the content of a variable this method tries to obtain the real value of the variable, cleaning expressions like "a = eval; a(js_code);" - + @param jsCode: The Javascript code (string) @param varContent: The content of the variable (string) @return: A string with real value of the variable @@ -172,15 +179,15 @@ def getVarContent(jsCode, varContent): def isJavascript(content): ''' Given an string this method looks for typical Javscript strings and try to identify if the string contains Javascrit code or not. - + @param content: A string @return: A boolean, True if it seems to contain Javascript code or False in the other case ''' - jsStrings = ['var ', ';', ')', '(', 'function ', '=', '{', '}', 'if ', 'else', 'return', 'while ', 'for ', - ',', 'eval'] + jsStrings = [ + 'var ', ';', ')', '(', 'function ', '=', '{', '}', 'if ', + 'else', 'return', 'while ', 'for ', ',', 'eval', + ] keyStrings = [';', '(', ')'] - reVarInit = 'var [\w0-9]+\s*?=' - reFunctionCall = '[\w0-9]+\s*?\(.*?\)\s*?;' stringsFound = [] limit = 15 minDistinctStringsFound = 4 @@ -189,12 +196,15 @@ def isJavascript(content): length = len(content) smallScriptLength = 100 + if content.startswith("/GS1 gs"): + return False + if re.findall(reJSscript, content, re.DOTALL | re.IGNORECASE): return True - - for char in content: - if (ord(char) < 32 and char not in ['\n', '\r', '\t', '\f', '\x00']) or ord(char) >= 127: - return False + + _, count = re.subn(bad_chars_re, "", content, len(content) // 10) + if int(count) == len(content) // 10: + return False for string in jsStrings: cont = content.count(string) @@ -216,43 +226,43 @@ def isJavascript(content): def searchObfuscatedFunctions(jsCode, function): ''' Search for obfuscated functions in the Javascript code - + @param jsCode: The Javascript code (string) @param function: The function name to look for (string) - @return: List with obfuscated functions information [functionName,functionCall,containsReturns] + @return: List with obfuscated functions information [functionName,functionCall,containsReturns] ''' obfuscatedFunctionsInfo = [] - if jsCode != None: + if jsCode is not None: match = re.findall('\W('+function+'\s{0,5}?\((.*?)\)\s{0,5}?;)', jsCode, re.DOTALL) if match: - for m in match: - if re.findall('return', m[1], re.IGNORECASE): - obfuscatedFunctionsInfo.append([function, m, True]) - else: - obfuscatedFunctionsInfo.append([function, m, False]) + for m in match: + if re.findall('return', m[1], re.IGNORECASE): + obfuscatedFunctionsInfo.append([function, m, True]) + else: + obfuscatedFunctionsInfo.append([function, m, False]) obfuscatedFunctions = re.findall('\s*?((\w*?)\s*?=\s*?'+function+')\s*?;', jsCode, re.DOTALL) for obfuscatedFunction in obfuscatedFunctions: - obfuscatedElement = obfuscatedFunction[1] - obfuscatedFunctionsInfo += searchObfuscatedFunctions(jsCode, obfuscatedElement) + obfuscatedElement = obfuscatedFunction[1] + obfuscatedFunctionsInfo += searchObfuscatedFunctions(jsCode, obfuscatedElement) return obfuscatedFunctionsInfo -def unescape(escapedBytes, unicode = True): +def unescape(escapedBytes, str=True): ''' This method unescapes the given string - + @param escapedBytes: A string to unescape @return: A tuple (status,statusContent), where statusContent is an unescaped string in case status = 0 or an error in case status = -1 ''' - #TODO: modify to accept a list of escaped strings? + # TODO: modify to accept a list of escaped strings? unescapedBytes = '' - if unicode: + if str: unicodePadding = '\x00' else: unicodePadding = '' try: - if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\u') != -1 or escapedBytes.find('%') != -1: - if escapedBytes.lower().find('\u') != -1: + if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\\u') != -1 or escapedBytes.find('%') != -1: + if escapedBytes.lower().find('\\u') != -1: splitBytes = escapedBytes.split('\\') else: splitBytes = escapedBytes.split('%') @@ -261,14 +271,14 @@ def unescape(escapedBytes, unicode = True): if splitByte == '': continue if len(splitByte) > 4 and re.match('u[0-9a-f]{4}', splitByte[:5], re.IGNORECASE): - unescapedBytes += chr(int(splitByte[3]+splitByte[4], 16))+chr(int(splitByte[1]+splitByte[2],16)) + unescapedBytes += chr(int(splitByte[3]+splitByte[4], 16))+chr(int(splitByte[1]+splitByte[2], 16)) if len(splitByte) > 5: - for j in range(5,len(splitByte)): + for j in range(5, len(splitByte)): unescapedBytes += splitByte[j] + unicodePadding elif len(splitByte) > 1 and re.match('[0-9a-f]{2}', splitByte[:2], re.IGNORECASE): unescapedBytes += chr(int(splitByte[0]+splitByte[1], 16)) + unicodePadding if len(splitByte) > 2: - for j in range(2,len(splitByte)): + for j in range(2, len(splitByte)): unescapedBytes += splitByte[j] + unicodePadding else: if i != 0: @@ -279,4 +289,4 @@ def unescape(escapedBytes, unicode = True): unescapedBytes = escapedBytes except: return (-1, 'Error while unescaping the bytes') - return (0, unescapedBytes) \ No newline at end of file + return (0, unescapedBytes) diff --git a/PDFConsole.py b/peepdf/PDFConsole.py similarity index 87% rename from PDFConsole.py rename to peepdf/PDFConsole.py index 5b9b3d9..29bffda 100644 --- a/PDFConsole.py +++ b/peepdf/PDFConsole.py @@ -29,26 +29,35 @@ import sys import os import re -import subprocess -import optparse import hashlib -import jsbeautifier import traceback -from PDFUtils import * -from PDFCrypto import * -from JSAnalysis import * -from PDFCore import * +import jsbeautifier + +# Return a string on Py2 & Py3. +from builtins import input + +from peepdf.PDFUtils import ( + getBytesFromFile, countArrayElements, clearScreen, hexToString, + escapeRegExpString, vtcheck, countNonPrintableChars +) +from peepdf.PDFCrypto import xor +from peepdf.JSAnalysis import isJavascript, analyseJS, Global, unescape +from peepdf.PDFCore import ( + PDFFile, PDFHexString, PDFDictionary, PDFNum, PDFName, PDFStream, + PDFReference, PDFString, PDFArray, PDFBool, PDFNull, vulnsDict, PDFParser +) + from base64 import b64encode, b64decode -from PDFFilters import decodeStream, encodeStream -from jjdecode import JJDecoder +from peepdf.PDFFilters import decodeStream, encodeStream +from peepdf.jjdecode import JJDecoder try: - from colorama import init, Fore, Back, Style + from colorama import init, Fore, Style COLORIZED_OUTPUT = True except: COLORIZED_OUTPUT = False try: - import PyV8 + import STPyV8 as PyV8 JS_MODULE = True except ImportError as e: JS_MODULE = False @@ -63,16 +72,18 @@ import readline RL_PROMPT_START_IGNORE = '\001' RL_PROMPT_END_IGNORE = '\002' + readline # Pretend like we use it. except: RL_PROMPT_START_IGNORE = RL_PROMPT_END_IGNORE = '' -# File and variable redirections +# File and variable redirections FILE_WRITE = 1 FILE_ADD = 2 VAR_WRITE = 3 VAR_ADD = 4 newLine = os.linesep -errorsFile = 'errors.txt' +errorsFile = os.path.expanduser("~/.peepdf-error.txt") + filter2RealFilterDict = {'b64': 'base64', 'base64': 'base64', 'asciihex': '/ASCIIHexDecode', 'ahx': '/ASCIIHexDecode', 'ascii85': '/ASCII85Decode', 'a85': '/ASCII85Decode', 'lzw': '/LZWDecode', 'flatedecode': '/FlateDecode', 'fl': '/FlateDecode', 'runlength': '/RunLengthDecode', @@ -144,7 +155,7 @@ def precmd(self, line): def postloop(self): if self.use_rawinput: - print newLine + 'Leaving the Peepdf interactive console...Bye! ;)' + newLine + print((newLine + 'Leaving the Peepdf interactive console...Bye! ;)' + newLine)) self.leaving = True def do_bytes(self, argv): @@ -177,8 +188,8 @@ def do_bytes(self, argv): self.help_bytes() def help_bytes(self): - print newLine + 'Usage: bytes $offset $num_bytes [$file]' - print newLine + 'Shows or stores in the specified file $num_bytes of the file beginning from $offset' + newLine + print((newLine + 'Usage: bytes $offset $num_bytes [$file]')) + print((newLine + 'Shows or stores in the specified file $num_bytes of the file beginning from $offset' + newLine)) def do_changelog(self, argv): if self.pdfFile is None: @@ -213,13 +224,13 @@ def do_changelog(self, argv): return False # Getting information about original document data = self.pdfFile.getBasicMetadata(0) - if data.has_key('author'): + if "author" in data: output += '\tAuthor: ' + data['author'] + newLine - if data.has_key('creator'): + if "creator" in data: output += '\tCreator: ' + data['creator'] + newLine - if data.has_key('producer'): + if "producer" in data: output += '\tProducer: ' + data['producer'] + newLine - if data.has_key('creation'): + if "creation" in data: output += '\tCreation date: ' + data['creation'] + newLine if output != '': output = 'Original document information:' + newLine + output + newLine @@ -234,13 +245,13 @@ def do_changelog(self, argv): output += 'Changes in version ' + str(i + 1) + ':' + newLine # Getting modification information data = self.pdfFile.getBasicMetadata(i + 1) - if data.has_key('author'): + if "author" in data: output += '\tAuthor: ' + data['author'] + newLine - if data.has_key('creator'): + if "creator" in data: output += '\tCreator: ' + data['creator'] + newLine - if data.has_key('producer'): + if "producer" in data: output += '\tProducer: ' + data['producer'] + newLine - if data.has_key('modification'): + if "modification" in data: output += '\tModification date: ' + data['modification'] + newLine addedObjects = changelog[0] modifiedObjects = changelog[1] @@ -258,8 +269,8 @@ def do_changelog(self, argv): self.log_output('changelog ' + argv, output) def help_changelog(self): - print newLine + 'Usage: changelog [$version]' - print newLine + 'Shows the changelog of the document or version of the document' + newLine + print((newLine + 'Usage: changelog [$version]')) + print((newLine + 'Shows the changelog of the document or version of the document' + newLine)) def do_create(self, argv): message = '' @@ -299,7 +310,7 @@ def do_create(self, argv): content = open(jsFile, 'rb').read() else: if self.use_rawinput: - content = raw_input( + content = input( newLine + 'Please, specify the Javascript code you want to include in the file (if the code includes EOL characters use a js_file instead):' + newLine * 2) else: message = '*** Error: You must specify a Javascript file in batch mode!!' @@ -321,8 +332,6 @@ def do_create(self, argv): message = '*** Error: You must open a file!!' self.log_output('create ' + argv, message) return False - objectsToCompress = [] - streamContent = None version = None if numArgs == 2: version = args[1] @@ -340,7 +349,7 @@ def do_create(self, argv): return False warning = 'Warning: stream objects cannot be compressed. If the Catalog object is compressed could lead to corrupted files for Adobe Reader!!' if self.use_rawinput: - res = raw_input( + res = input( warning + newLine + 'Which objects do you want to compress? (Valid respones: all | 1-5 | 1,2,5,7,8) ') else: res = 'all' @@ -348,7 +357,7 @@ def do_create(self, argv): objects = [] elif res.count('-') == 1: limits = res.split('-') - objects = range(int(limits[0]), int(limits[1]) + 1) + objects = list(range(int(limits[0]), int(limits[1]) + 1)) elif res.find(',') != -1: objects = [int(id) for id in res.split(',')] elif res.isdigit(): @@ -378,10 +387,10 @@ def do_create(self, argv): self.log_output('create ' + argv, message) def help_create(self): - print newLine + 'Usage: create pdf simple|(open_action_js [$js_file])' - print newLine + 'Creates a new simple PDF file or one with Javascript code to be executed when opening the file. It\'s possible to specify the file where the Javascript code is stored or do it manually.' + newLine * 2 - print 'Usage: create object_stream [$version]' + newLine - print 'Creates an object stream choosing the objects to be compressed.' + newLine + print((newLine + 'Usage: create pdf simple|(open_action_js [$js_file])')) + print((newLine + 'Creates a new simple PDF file or one with Javascript code to be executed when opening the file. It\'s possible to specify the file where the Javascript code is stored or do it manually.' + newLine * 2)) + print(('Usage: create object_stream [$version]' + newLine)) + print(('Creates an object stream choosing the objects to be compressed.' + newLine)) def do_decode(self, argv): decodedContent = '' @@ -423,7 +432,7 @@ def do_decode(self, argv): size = int(args[1]) for i in range(iniFilterArgs, len(args)): filter = args[i].lower() - if filter not in filter2RealFilterDict.keys(): + if filter not in list(filter2RealFilterDict.keys()): self.help_decode() return False if filter in notImplementedFilters: @@ -436,7 +445,7 @@ def do_decode(self, argv): return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('decode ' + argv, message) return False @@ -481,21 +490,21 @@ def do_decode(self, argv): self.log_output('decode ' + argv, decodedContent, [decodedContent], bytesOutput=True) def help_decode(self): - print newLine + 'Usage: decode variable $var_name $filter1 [$filter2 ...]' - print 'Usage: decode file $file_name $filter1 [$filter2 ...]' - print 'Usage: decode raw $offset $num_bytes $filter1 [$filter2 ...]' - print 'Usage: decode string $encoded_string $filter1 [$filter2 ...]' + newLine - print 'Decodes the content of the specified variable, file or raw bytes using the following filters or algorithms:' - print '\tbase64,b64: Base64' - print '\tasciihex,ahx: /ASCIIHexDecode' - print '\tascii85,a85: /ASCII85Decode' - print '\tlzw: /LZWDecode' - print '\tflatedecode,fl: /FlateDecode' - print '\trunlength,rl: /RunLengthDecode' - print '\tccittfax,ccf: /CCITTFaxDecode' - print '\tjbig2: /JBIG2Decode (Not implemented)' - print '\tdct: /DCTDecode (Not implemented)' - print '\tjpx: /JPXDecode (Not implemented)' + newLine + print((newLine + 'Usage: decode variable $var_name $filter1 [$filter2 ...]')) + print ('Usage: decode file $file_name $filter1 [$filter2 ...]') + print ('Usage: decode raw $offset $num_bytes $filter1 [$filter2 ...]') + print(('Usage: decode string $encoded_string $filter1 [$filter2 ...]' + newLine)) + print ('Decodes the content of the specified variable, file or raw bytes using the following filters or algorithms:') + print ('\tbase64,b64: Base64') + print ('\tasciihex,ahx: /ASCIIHexDecode') + print ('\tascii85,a85: /ASCII85Decode') + print ('\tlzw: /LZWDecode') + print ('\tflatedecode,fl: /FlateDecode') + print ('\trunlength,rl: /RunLengthDecode') + print ('\tccittfax,ccf: /CCITTFaxDecode') + print ('\tjbig2: /JBIG2Decode (Not implemented)') + print ('\tdct: /DCTDecode (Not implemented)') + print(('\tjpx: /JPXDecode (Not implemented)' + newLine)) def do_decrypt(self, argv): if self.pdfFile is None: @@ -521,8 +530,8 @@ def do_decrypt(self, argv): self.log_output('decrypt ' + argv, message) def help_decrypt(self): - print newLine + 'Usage: decrypt $password' - print newLine + 'Decrypts the file with the specified password' + newLine + print((newLine + 'Usage: decrypt $password')) + print((newLine + 'Decrypts the file with the specified password' + newLine)) def do_embed(self, argv): fileType = 'application#2Fpdf' @@ -783,10 +792,10 @@ def do_embed(self, argv): self.log_output('open ' + argv, message) def help_embed(self): - print newLine + 'Usage: embed [-x] $filename [$file_type]' - print newLine + 'Embeds the specified file in the actual PDF file. The default type is "application/pdf".' + newLine - print 'Options:' - print '\t-x: The file is executed when the actual PDF file is opened' + newLine + print((newLine + 'Usage: embed [-x] $filename [$file_type]')) + print((newLine + 'Embeds the specified file in the actual PDF file. The default type is "application/pdf".' + newLine)) + print ('Options:') + print(('\t-x: The file is executed when the actual PDF file is opened' + newLine)) def do_encode(self, argv): encodedContent = '' @@ -828,7 +837,7 @@ def do_encode(self, argv): size = int(args[1]) for i in range(iniFilterArgs, len(args)): filter = args[i].lower() - if filter not in filter2RealFilterDict.keys(): + if filter not in list(filter2RealFilterDict.keys()): self.help_encode() return False if filter in notImplementedFilters: @@ -841,7 +850,7 @@ def do_encode(self, argv): return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('encode ' + argv, message) return False @@ -881,21 +890,21 @@ def do_encode(self, argv): self.log_output('encode ' + argv, encodedContent, [encodedContent], bytesOutput=True) def help_encode(self): - print newLine + 'Usage: encode variable $var_name $filter1 [$filter2 ...]' - print 'Usage: encode file $file_name $filter1 [$filter2 ...]' - print 'Usage: encode raw $offset $num_bytes $filter1 [$filter2 ...]' - print 'Usage: encode string $my_string $filter1 [$filter2 ...]' + newLine - print 'Encodes the content of the specified variable, file or raw bytes using the following filters or algorithms:' - print '\tbase64,b64: Base64' - print '\tasciihex,ahx: /ASCIIHexDecode' - print '\tascii85,a85: /ASCII85Decode (Not implemented)' - print '\tlzw: /LZWDecode' - print '\tflatedecode,fl: /FlateDecode' - print '\trunlength,rl: /RunLengthDecode (Not implemented)' - print '\tccittfax,ccf: /CCITTFaxDecode (Not implemented)' - print '\tjbig2: /JBIG2Decode (Not implemented)' - print '\tdct: /DCTDecode (Not implemented)' - print '\tjpx: /JPXDecode (Not implemented)' + newLine + print((newLine + 'Usage: encode variable $var_name $filter1 [$filter2 ...]')) + print ('Usage: encode file $file_name $filter1 [$filter2 ...]') + print ('Usage: encode raw $offset $num_bytes $filter1 [$filter2 ...]') + print(('Usage: encode string $my_string $filter1 [$filter2 ...]' + newLine)) + print ('Encodes the content of the specified variable, file or raw bytes using the following filters or algorithms:') + print ('\tbase64,b64: Base64') + print ('\tasciihex,ahx: /ASCIIHexDecode') + print ('\tascii85,a85: /ASCII85Decode (Not implemented)') + print ('\tlzw: /LZWDecode') + print ('\tflatedecode,fl: /FlateDecode') + print ('\trunlength,rl: /RunLengthDecode (Not implemented)') + print ('\tccittfax,ccf: /CCITTFaxDecode (Not implemented)') + print ('\tjbig2: /JBIG2Decode (Not implemented)') + print ('\tdct: /DCTDecode (Not implemented)') + print(('\tjpx: /JPXDecode (Not implemented)' + newLine)) def do_encode_strings(self, argv): if self.pdfFile is None: @@ -975,8 +984,8 @@ def do_encode_strings(self, argv): self.log_output('encode_strings ' + argv, message) def help_encode_strings(self): - print newLine + 'Usage: encode_strings [$object_id|trailer [$version]]' - print newLine + 'Encodes the strings and names included in the file, object or trailer' + newLine + print((newLine + 'Usage: encode_strings [$object_id|trailer [$version]]')) + print((newLine + 'Encodes the strings and names included in the file, object or trailer' + newLine)) def do_encrypt(self, argv): if self.pdfFile is None: @@ -1004,8 +1013,8 @@ def do_encrypt(self, argv): self.log_output('encrypt ' + argv, message) def help_encrypt(self): - print newLine + 'Usage: encrypt [$password]' - print newLine + 'Encrypts the file with the default or specified password' + newLine + print((newLine + 'Usage: encrypt [$password]')) + print((newLine + 'Encrypts the file with the default or specified password' + newLine)) def do_errors(self, argv): if self.pdfFile is None: @@ -1090,19 +1099,19 @@ def do_errors(self, argv): self.log_output('errors ' + argv, errors) def help_errors(self): - print newLine + 'Usage: errors [$object_id|xref|trailer [$version]]' - print newLine + 'Shows the errors of the file or object (object_id, xref, trailer)' + newLine + print((newLine + 'Usage: errors [$object_id|xref|trailer [$version]]')) + print((newLine + 'Shows the errors of the file or object (object_id, xref, trailer)' + newLine)) def do_exit(self, argv): return True def help_exit(self): - print newLine + 'Usage: exit' - print newLine + 'Exits from the console' + newLine + print((newLine + 'Usage: exit')) + print((newLine + 'Exits from the console' + newLine)) def do_extract(self, argv): validTypes = ['uri', 'js'] - #TODO Add more extraction types like embedded files, flash, etc + # TODO Add more extraction types like embedded files, flash, etc if self.pdfFile is None: message = '*** Error: You must open a file!!' self.log_output('extract ' + argv, message) @@ -1152,9 +1161,8 @@ def do_extract(self, argv): self.log_output('extract ' + argv, output) def help_extract(self): - print newLine + 'Usage: extract uri|js [$version]' - print newLine + 'Extracts all the given type elements of the specified version after being decoded and decrypted (if necessary)' + newLine - + print((newLine + 'Usage: extract uri|js [$version]')) + print((newLine + 'Extracts all the given type elements of the specified version after being decoded and decrypted (if necessary)' + newLine)) def do_filters(self, argv): if self.pdfFile is None: @@ -1183,7 +1191,7 @@ def do_filters(self, argv): iniFilterArgs = 2 else: version = None - validFilters = filter2RealFilterDict.keys() + ['none'] + validFilters = list(filter2RealFilterDict.keys()) + ['none'] validFilters.remove('b64') validFilters.remove('base64') for i in range(iniFilterArgs, len(args)): @@ -1273,18 +1281,18 @@ def do_filters(self, argv): self.log_output('filters ' + argv, message + value, [value], bytesOutput=True) def help_filters(self): - print newLine + 'Usage: filters $object_id [$version] [$filter1 [$filter2 ...]]' - print newLine + 'Shows the filters found in the stream object or set the filters in the object (first filter is used first). The valid values for filters are the following:' - print '\tnone: No filters' - print '\tasciihex,ahx: /ASCIIHexDecode' - print '\tascii85,a85: /ASCII85Decode (Not implemented)' - print '\tlzw: /LZWDecode' - print '\tflatedecode,fl: /FlateDecode' - print '\trunlength,rl: /RunLengthDecode (Not implemented)' - print '\tccittfax,ccf: /CCITTFaxDecode (Not implemented)' - print '\tjbig2: /JBIG2Decode (Not implemented)' - print '\tdct: /DCTDecode (Not implemented)' - print '\tjpx: /JPXDecode (Not implemented)' + newLine + print((newLine + 'Usage: filters $object_id [$version] [$filter1 [$filter2 ...]]')) + print((newLine + 'Shows the filters found in the stream object or set the filters in the object (first filter is used first). The valid values for filters are the following:')) + print ('\tnone: No filters') + print ('\tasciihex,ahx: /ASCIIHexDecode') + print ('\tascii85,a85: /ASCII85Decode (Not implemented)') + print ('\tlzw: /LZWDecode') + print ('\tflatedecode,fl: /FlateDecode') + print ('\trunlength,rl: /RunLengthDecode (Not implemented)') + print ('\tccittfax,ccf: /CCITTFaxDecode (Not implemented)') + print ('\tjbig2: /JBIG2Decode (Not implemented)') + print ('\tdct: /DCTDecode (Not implemented)') + print(('\tjpx: /JPXDecode (Not implemented)' + newLine)) def do_hash(self, argv): content = '' @@ -1323,7 +1331,7 @@ def do_hash(self, argv): self.help_hash() return False if type == 'variable': - if not self.variables.has_key(srcName): + if srcName not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('hash ' + argv, message) return False @@ -1392,16 +1400,16 @@ def do_hash(self, argv): self.log_output('hash ' + argv, output) def help_hash(self): - print newLine + 'Usage: hash object|rawobject|stream|rawstream $object_id [$version]' - print 'Usage: hash raw $offset $num_bytes' - print 'Usage: hash file $file_name' - print 'Usage: hash variable $var_name' - print 'Usage: hash string $my_string' - print newLine + 'Generates the hash (MD5/SHA1/SHA256) of the specified source: raw bytes of the file, objects and streams, and the content of files or variables' + newLine + print((newLine + 'Usage: hash object|rawobject|stream|rawstream $object_id [$version]')) + print ('Usage: hash raw $offset $num_bytes') + print ('Usage: hash file $file_name') + print ('Usage: hash variable $var_name') + print ('Usage: hash string $my_string') + print((newLine + 'Generates the hash (MD5/SHA1/SHA256) of the specified source: raw bytes of the file, objects and streams, and the content of files or variables' + newLine)) def help_help(self): - print newLine + 'Usage: help [$command]' - print newLine + 'Shows the available commands or the usage of the specified command' + newLine + print((newLine + 'Usage: help [$command]')) + print((newLine + 'Shows the available commands or the usage of the specified command' + newLine)) def do_info(self, argv): if self.pdfFile is None: @@ -1423,7 +1431,7 @@ def do_info(self, argv): stats += beforeStaticLabel + 'File: ' + self.resetColor + statsDict['File'] + newLine stats += beforeStaticLabel + 'MD5: ' + self.resetColor + statsDict['MD5'] + newLine stats += beforeStaticLabel + 'SHA1: ' + self.resetColor + statsDict['SHA1'] + newLine - #stats += beforeStaticLabel + 'SHA256: ' + self.resetColor + statsDict['SHA256'] + newLine + # stats += beforeStaticLabel + 'SHA256: ' + self.resetColor + statsDict['SHA256'] + newLine stats += beforeStaticLabel + 'Size: ' + self.resetColor + statsDict['Size'] + ' bytes' + newLine if statsDict['Detection'] != []: detectionReportInfo = '' @@ -1471,41 +1479,61 @@ def do_info(self, argv): stats += beforeStaticLabel + '\tInfo: ' + self.resetColor + statsVersion['Info'] + newLine else: stats += beforeStaticLabel + '\tInfo: ' + self.resetColor + 'No' + newLine - stats += beforeStaticLabel + '\tObjects (' + statsVersion['Objects'][0] + '): ' + \ - self.resetColor + str(statsVersion['Objects'][1]) + newLine + stats += ( + beforeStaticLabel + '\tObjects (' + statsVersion['Objects'][0] + '): ' + + self.resetColor + str(statsVersion['Objects'][1]) + newLine + ) if statsVersion['Compressed Objects'] is not None: - stats += beforeStaticLabel + '\tCompressed objects (' +\ - statsVersion['Compressed Objects'][0] + '): ' + self.resetColor + \ - str(statsVersion['Compressed Objects'][1]) + newLine + stats += ( + beforeStaticLabel + '\tCompressed objects (' + + statsVersion['Compressed Objects'][0] + '): ' + self.resetColor + + str(statsVersion['Compressed Objects'][1]) + newLine + ) if statsVersion['Errors'] is not None: - stats += beforeStaticLabel + '\t\tErrors (' + statsVersion['Errors'][0] + '): ' + \ - self.resetColor + str(statsVersion['Errors'][1]) + newLine - stats += beforeStaticLabel + '\tStreams (' + statsVersion['Streams'][0] + '): ' + \ - self.resetColor + str(statsVersion['Streams'][1]) + stats += ( + beforeStaticLabel + '\t\tErrors (' + statsVersion['Errors'][0] + '): ' + + self.resetColor + str(statsVersion['Errors'][1]) + newLine + ) + stats += ( + beforeStaticLabel + '\tStreams (' + statsVersion['Streams'][0] + '): ' + + self.resetColor + str(statsVersion['Streams'][1]) + ) if statsVersion['Xref Streams'] is not None: - stats += newLine + beforeStaticLabel + '\t\tXref streams (' + \ - statsVersion['Xref Streams'][0] + '): ' + self.resetColor + \ - str(statsVersion['Xref Streams'][1]) + stats += ( + newLine + beforeStaticLabel + '\t\tXref streams (' + + statsVersion['Xref Streams'][0] + '): ' + self.resetColor + + str(statsVersion['Xref Streams'][1]) + ) if statsVersion['Object Streams'] is not None: - stats += newLine + beforeStaticLabel + '\t\tObject streams (' + \ - statsVersion['Object Streams'][0] + '): ' + self.resetColor + \ - str(statsVersion['Object Streams'][1]) + stats += ( + newLine + beforeStaticLabel + '\t\tObject streams (' + + statsVersion['Object Streams'][0] + '): ' + self.resetColor + + str(statsVersion['Object Streams'][1]) + ) if int(statsVersion['Streams'][0]) > 0: - stats += newLine + beforeStaticLabel + '\t\tEncoded (' + statsVersion['Encoded'][0] + '): ' + \ - self.resetColor + str(statsVersion['Encoded'][1]) + stats += ( + newLine + beforeStaticLabel + '\t\tEncoded (' + statsVersion['Encoded'][0] + '): ' + + self.resetColor + str(statsVersion['Encoded'][1]) + ) if statsVersion['Decoding Errors'] is not None: - stats += newLine + beforeStaticLabel + '\t\tDecoding errors (' + \ - statsVersion['Decoding Errors'][0] + '): ' + self.resetColor + str( - statsVersion['Decoding Errors'][1]) + stats += ( + newLine + beforeStaticLabel + '\t\tDecoding errors (' + + statsVersion['Decoding Errors'][0] + '): ' + self.resetColor + + str(statsVersion['Decoding Errors'][1]) + ) if statsVersion['URIs'] is not None: - stats += newLine + beforeStaticLabel + '\tObjects with URIs (' + \ - statsVersion['URIs'][0] + '): ' + self.resetColor + str(statsVersion['URIs'][1]) + stats += ( + newLine + beforeStaticLabel + '\tObjects with URIs (' + + statsVersion['URIs'][0] + '): ' + self.resetColor + str(statsVersion['URIs'][1]) + ) if not self.avoidOutputColors: beforeStaticLabel = self.warningColor if statsVersion['Objects with JS code'] is not None: - stats += newLine + beforeStaticLabel + '\tObjects with JS code (' + \ - statsVersion['Objects with JS code'][0] + '): ' + \ - self.resetColor + str(statsVersion['Objects with JS code'][1]) + stats += ( + newLine + beforeStaticLabel + '\tObjects with JS code (' + + statsVersion['Objects with JS code'][0] + '): ' + + self.resetColor + str(statsVersion['Objects with JS code'][1]) + ) actions = statsVersion['Actions'] events = statsVersion['Events'] vulns = statsVersion['Vulns'] @@ -1522,20 +1550,22 @@ def do_info(self, argv): self.resetColor + str(actions[action]) + newLine if vulns is not None: for vuln in vulns: - if vulnsDict.has_key(vuln): + if vuln in vulnsDict: vulnName = vulnsDict[vuln][0] vulnCVEList = vulnsDict[vuln][1] stats += '\t\t' + beforeStaticLabel + vulnName + ' (' for vulnCVE in vulnCVEList: stats += vulnCVE + ',' - stats = stats[:-1] + ') (%d): ' % len(vulns[vuln]) + self.resetColor + \ - str(vulns[vuln]) + newLine + stats = ( + stats[:-1] + ') (%d): ' % len(vulns[vuln]) + self.resetColor + + str(vulns[vuln]) + newLine + ) else: stats += '\t\t' + beforeStaticLabel + vuln + ' (%d): ' % len(vulns[vuln]) + \ self.resetColor + str(vulns[vuln]) + newLine if elements is not None: for element in elements: - if vulnsDict.has_key(element): + if element in vulnsDict: vulnName = vulnsDict[element][0] vulnCVEList = vulnsDict[element][1] stats += '\t\t' + beforeStaticLabel + vulnName + ' (' @@ -1586,7 +1616,7 @@ def do_info(self, argv): if xrefArray[1] is not None: statsStream = xrefArray[1].getStats() for key in statsStream: - if not statsDict.has_key(key): + if key not in statsDict: statsDict[key] = statsStream[key] if statsDict['Offset'] is not None: stats += beforeStaticLabel + 'Offset: ' + self.resetColor + statsDict['Offset'] + newLine @@ -1619,7 +1649,7 @@ def do_info(self, argv): if trailerArray[1] is not None: statsStream = trailerArray[1].getStats() for key in statsStream: - if not statsDict.has_key(key): + if key not in statsDict: statsDict[key] = statsStream[key] if statsDict['Offset'] is not None: stats += beforeStaticLabel + 'Offset: ' + self.resetColor + statsDict['Offset'] + newLine @@ -1710,8 +1740,8 @@ def do_info(self, argv): self.log_output('info ' + argv, stats) def help_info(self): - print newLine + 'Usage: info [$object_id|xref|trailer [$version]]' - print newLine + 'Shows information of the file or object ($object_id, xref, trailer)' + newLine + print((newLine + 'Usage: info [$object_id|xref|trailer [$version]]')) + print((newLine + 'Shows information of the file or object ($object_id, xref, trailer)' + newLine)) def do_js_analyse(self, argv): content = '' @@ -1738,7 +1768,7 @@ def do_js_analyse(self, argv): self.help_js_analyse() return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('js_analyse ' + argv, message) return False @@ -1746,13 +1776,13 @@ def do_js_analyse(self, argv): content = self.variables[src][0] if not isJavascript(content): if self.use_rawinput: - res = raw_input('The variable may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The variable may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The variable does not contain Javascript code!!' self.log_output('js_analyse ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'file': if not os.path.exists(src): message = '*** Error: The file does not exist!!' @@ -1762,13 +1792,13 @@ def do_js_analyse(self, argv): content = open(src, 'rb').read() if not isJavascript(content): if self.use_rawinput: - res = raw_input('The file may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The file may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The file does not contain Javascript code!!' self.log_output('js_analyse ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'object': if self.pdfFile is None: message = '*** Error: You must open a file!!' @@ -1790,13 +1820,13 @@ def do_js_analyse(self, argv): content = object.getJSCode()[0] else: if self.use_rawinput: - res = raw_input('The object may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The object may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The object does not contain Javascript code!!' self.log_output('js_analyse ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) objectType = object.getType() if objectType == 'stream': content = object.getStream() @@ -1821,9 +1851,7 @@ def do_js_analyse(self, argv): else: content = src content = content.strip() - jsCode, unescapedBytes, urlsFound, jsErrors, self.javaScriptContexts['global'] = analyseJS(content, - self.javaScriptContexts[ - 'global']) + jsCode, unescapedBytes, urlsFound, jsErrors, self.javaScriptContexts['global'] = analyseJS(content, self.javaScriptContexts['global']) if content not in jsCode: jsCode = [content] + jsCode jsanalyseOutput = '' @@ -1852,15 +1880,14 @@ def do_js_analyse(self, argv): self.log_output('js_analyse ' + argv, jsanalyseOutput, unescapedBytes) def help_js_analyse(self): - print newLine + 'Usage: js_analyse variable $var_name' - print 'Usage: js_analyse file $file_name' - print 'Usage: js_analyse object $object_id [$version]' - print 'Usage: js_analyse string $javascript_code' - print newLine + 'Analyses the Javascript code stored in the specified string, variable, file or object' + newLine + print((newLine + 'Usage: js_analyse variable $var_name')) + print ('Usage: js_analyse file $file_name') + print ('Usage: js_analyse object $object_id [$version]') + print ('Usage: js_analyse string $javascript_code') + print((newLine + 'Analyses the Javascript code stored in the specified string, variable, file or object' + newLine)) def do_js_beautify(self, argv): content = '' - bytes = '' validTypes = ['variable', 'file', 'object', 'string'] args = self.parseArgs(argv) if args is None: @@ -1880,7 +1907,7 @@ def do_js_beautify(self, argv): self.help_js_beautify() return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('js_beautify ' + argv, message) return False @@ -1888,13 +1915,13 @@ def do_js_beautify(self, argv): content = self.variables[src][0] if not isJavascript(content): if self.use_rawinput: - res = raw_input('The variable may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The variable may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The variable does not contain Javascript code!!' self.log_output('js_beautify ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'file': if not os.path.exists(src): message = '*** Error: The file does not exist!!' @@ -1904,13 +1931,13 @@ def do_js_beautify(self, argv): content = open(src, 'rb').read() if not isJavascript(content): if self.use_rawinput: - res = raw_input('The file may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The file may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The file does not contain Javascript code!!' self.log_output('js_beautify ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'string': content = src else: @@ -1934,13 +1961,13 @@ def do_js_beautify(self, argv): content = object.getJSCode()[0] else: if self.use_rawinput: - res = raw_input('The object may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The object may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The object does not contain Javascript code!!' self.log_output('js_beautify ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) objectType = object.getType() if objectType == 'stream': content = object.getStream() @@ -1967,11 +1994,11 @@ def do_js_beautify(self, argv): self.log_output('js_beautify ' + argv, beautyContent) def help_js_beautify(self): - print newLine + 'Usage: js_beautify variable $var_name' - print 'Usage: js_beautify file $file_name' - print 'Usage: js_beautify object $object_id [$version]' - print 'Usage: js_beautify string $javascript_code [$version]' - print newLine + 'Beautifies the Javascript code stored in the specified variable, file or object' + newLine + print((newLine + 'Usage: js_beautify variable $var_name')) + print ('Usage: js_beautify file $file_name') + print ('Usage: js_beautify object $object_id [$version]') + print ('Usage: js_beautify string $javascript_code [$version]') + print((newLine + 'Beautifies the Javascript code stored in the specified variable, file or object' + newLine)) def do_js_code(self, argv): if self.pdfFile is None: @@ -2011,7 +2038,7 @@ def do_js_code(self, argv): jsCode = object.getJSCode() if len(jsCode) > 1: if self.use_rawinput: - res = raw_input( + res = input( newLine + 'There are more than one Javascript code, do you want to see all (1) or just the last one (2)? ') else: res = '1' @@ -2034,8 +2061,8 @@ def do_js_code(self, argv): self.log_output('js_code ' + argv, message) def help_js_code(self): - print newLine + 'Usage: js_code $object_id [$version]' - print newLine + 'Shows the Javascript code found in the object' + newLine + print((newLine + 'Usage: js_code $object_id [$version]')) + print((newLine + 'Shows the Javascript code found in the object' + newLine)) def do_js_eval(self, argv): error = '' @@ -2063,7 +2090,7 @@ def do_js_eval(self, argv): self.help_js_eval() return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('js_eval ' + argv, message) return False @@ -2071,13 +2098,13 @@ def do_js_eval(self, argv): content = self.variables[src][0] if not isJavascript(content): if self.use_rawinput: - res = raw_input('The variable may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The variable may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The variable does not contain Javascript code!!' self.log_output('js_eval ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'file': if not os.path.exists(src): message = '*** Error: The file does not exist!!' @@ -2087,13 +2114,13 @@ def do_js_eval(self, argv): content = open(src, 'rb').read() if not isJavascript(content): if self.use_rawinput: - res = raw_input('The file may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The file may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The file does not contain Javascript code!!' self.log_output('js_eval ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'object': if self.pdfFile is None: message = '*** Error: You must open a file!!' @@ -2115,13 +2142,13 @@ def do_js_eval(self, argv): content = object.getJSCode()[0] else: if self.use_rawinput: - res = raw_input('The object may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The object may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The object does not contain Javascript code!!' self.log_output('js_eval ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) objectType = object.getType() if objectType == 'stream': content = object.getStream() @@ -2164,21 +2191,21 @@ def do_js_eval(self, argv): self.log_output('js_eval ' + argv, evalCode) except: error = str(sys.exc_info()[1]) - open('jserror.log', 'ab').write(error + newLine) + f = open(os.path.expanduser("~/.peepdf-jserror.log"), "ab") + f.write(error + newLine) if error != '': self.log_output('js_eval ' + argv, '*** Error: ' + error) def help_js_eval(self): - print newLine + 'Usage: js_eval variable $var_name' - print 'Usage: js_eval file $file_name' - print 'Usage: js_eval object $object_id [$version]' - print 'Usage: js_eval string $javascript_code' - print newLine + 'Evaluates the Javascript code stored in the specified variable, file, object or raw code in a global context' + newLine + print((newLine + 'Usage: js_eval variable $var_name')) + print ('Usage: js_eval file $file_name') + print ('Usage: js_eval object $object_id [$version]') + print ('Usage: js_eval string $javascript_code') + print((newLine + 'Evaluates the Javascript code stored in the specified variable, file, object or raw code in a global context' + newLine)) def do_js_jjdecode(self, argv): content = '' - bytes = '' validTypes = ['variable', 'file', 'object', 'string'] args = self.parseArgs(argv) if args is None: @@ -2198,7 +2225,7 @@ def do_js_jjdecode(self, argv): self.help_js_jjdecode() return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('js_jjdecode ' + argv, message) return False @@ -2206,13 +2233,13 @@ def do_js_jjdecode(self, argv): content = self.variables[src][0] if not isJavascript(content): if self.use_rawinput: - res = raw_input('The variable may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The variable may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The variable does not contain Javascript code!!' self.log_output('js_jjdecode ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'file': if not os.path.exists(src): message = '*** Error: The file does not exist!!' @@ -2222,13 +2249,13 @@ def do_js_jjdecode(self, argv): content = open(src, 'rb').read() if not isJavascript(content): if self.use_rawinput: - res = raw_input('The file may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The file may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The file does not contain Javascript code!!' self.log_output('js_jjdecode ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) elif type == 'string': content = src else: @@ -2252,13 +2279,13 @@ def do_js_jjdecode(self, argv): content = object.getJSCode()[0] else: if self.use_rawinput: - res = raw_input('The object may not contain Javascript code, do you want to continue? (y/n) ') + res = input('The object may not contain Javascript code, do you want to continue? (y/n) ') if res.lower() == 'n': message = '*** Error: The object does not contain Javascript code!!' self.log_output('js_jjdecode ' + argv, message) return False else: - print 'Warning: the object may not contain Javascript code...' + newLine + print(('Warning: the object may not contain Javascript code...' + newLine)) objectType = object.getType() if objectType == 'stream': content = object.getStream() @@ -2304,11 +2331,11 @@ def do_js_jjdecode(self, argv): self.log_output('js_jjdecode ' + argv, decodedContent) def help_js_jjdecode(self): - print newLine + 'Usage: js_jjdecode variable $var_name' - print 'Usage: js_jjdecode file $file_name' - print 'Usage: js_jjdecode object $object_id [$version]' - print 'Usage: js_jjdecode string $encoded_js_code [$version]' - print newLine + 'Decodes the Javascript code stored in the specified variable, file or object using the jjencode/decode algorithm by Yosuke Hasegawa (http://utf-8.jp/public/jjencode.html)' + newLine + print((newLine + 'Usage: js_jjdecode variable $var_name')) + print ('Usage: js_jjdecode file $file_name') + print ('Usage: js_jjdecode object $object_id [$version]') + print ('Usage: js_jjdecode string $encoded_js_code [$version]') + print((newLine + 'Decodes the Javascript code stored in the specified variable, file or object using the jjencode/decode algorithm by Yosuke Hasegawa (http://utf-8.jp/public/jjencode.html)' + newLine)) def do_js_join(self, argv): content = '' @@ -2329,7 +2356,7 @@ def do_js_join(self, argv): self.help_js_join() return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('js_join ' + argv, message) return False @@ -2354,14 +2381,14 @@ def do_js_join(self, argv): self.log_output('js_join ' + argv, finalString) def help_js_join(self): - print newLine + 'Usage: js_join variable $var_name' - print 'Usage: js_join file $file_name' - print 'Usage: js_join string $my_string' - print newLine + 'Joins some strings separated by quotes and stored in the specified variable or file in a unique one' + newLine - print 'Example:' + newLine - print 'aux = "%u65"+"54"+"%u74"+"73"' + newLine - print '> js_join variable aux' + newLine - print '%u6554%u7473' + newLine + print((newLine + 'Usage: js_join variable $var_name')) + print ('Usage: js_join file $file_name') + print ('Usage: js_join string $my_string') + print((newLine + 'Joins some strings separated by quotes and stored in the specified variable or file in a unique one' + newLine)) + print(('Example:' + newLine)) + print(('aux = "%u65"+"54"+"%u74"+"73"' + newLine)) + print(('> js_join variable aux' + newLine)) + print(('%u6554%u7473' + newLine)) def do_js_unescape(self, argv): content = '' @@ -2384,7 +2411,7 @@ def do_js_unescape(self, argv): self.help_js_unescape() return False if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('js_unescape ' + argv, message) return False @@ -2425,14 +2452,14 @@ def do_js_unescape(self, argv): self.log_output('js_unescape ' + argv, unescapedOutput, [bytes], bytesOutput=True) def help_js_unescape(self): - print newLine + 'Usage: js_unescape variable $var_name' - print 'Usage: js_unescape file $file_name' - print 'Usage: js_unescape string $escaped_string' - print newLine + 'Unescapes the escaped characters stored in the specified variable or file' + newLine - print 'Example:' + newLine - print 'aux = "%u6554%u7473"' + newLine - print '> js_unescape variable aux' + newLine - print '54 65 73 74 |Test|' + newLine + print((newLine + 'Usage: js_unescape variable $var_name')) + print ('Usage: js_unescape file $file_name') + print ('Usage: js_unescape string $escaped_string') + print((newLine + 'Unescapes the escaped characters stored in the specified variable or file' + newLine)) + print(('Example:' + newLine)) + print(('aux = "%u6554%u7473"' + newLine)) + print(('> js_unescape variable aux' + newLine)) + print(('54 65 73 74 |Test|' + newLine)) def do_js_vars(self, argv): varName = None @@ -2456,7 +2483,7 @@ def do_js_vars(self, argv): return False if len(args) == 1: varName = args[0] - if varName in context.locals.keys(): + if varName in list(context.locals.keys()): varContent = context.locals[varName] try: self.log_output('js_vars ' + argv, str(varContent)) @@ -2473,14 +2500,14 @@ def do_js_vars(self, argv): else: fixedVars = ['evalOverride', 'hasOwnProperty', 'isPrototypeOf', 'toLocaleString', 'toString', 'unwatch', 'valueOf', 'watch'] - varArray = context.locals.keys() + varArray = list(context.locals.keys()) for fixedVar in fixedVars: varArray.remove(fixedVar) self.log_output('js_vars ' + argv, str(varArray)) def help_js_vars(self): - print newLine + 'Usage: js_vars [$var_name]' - print newLine + 'Shows the Javascript variables defined in the execution context or the content of the specified variable' + newLine + print((newLine + 'Usage: js_vars [$var_name]')) + print((newLine + 'Shows the Javascript variables defined in the execution context or the content of the specified variable' + newLine)) def do_log(self, argv): args = self.parseArgs(argv) @@ -2491,9 +2518,9 @@ def do_log(self, argv): numArgs = len(args) if numArgs == 0: if self.loggingFile is None: - print newLine + 'Not logging now!!' + newLine + print((newLine + 'Not logging now!!' + newLine)) else: - print newLine + 'Log file: ' + self.loggingFile + newLine + print((newLine + 'Log file: ' + self.loggingFile + newLine)) elif numArgs == 1: param = args[0] if param == 'stop': @@ -2505,12 +2532,12 @@ def do_log(self, argv): return False def help_log(self): - print newLine + 'Usage: log' - print newLine + 'Shows the actual state of logging' + newLine - print 'Usage: log stop' - print newLine + 'Stops logging' + newLine - print 'Usage: log $log_file' - print newLine + 'Starts logging in the specified file' + newLine + print((newLine + 'Usage: log')) + print((newLine + 'Shows the actual state of logging' + newLine)) + print ('Usage: log stop') + print((newLine + 'Stops logging' + newLine)) + print ('Usage: log $log_file') + print((newLine + 'Starts logging in the specified file' + newLine)) def do_malformed_output(self, argv): malformedOptions = [] @@ -2551,15 +2578,15 @@ def do_malformed_output(self, argv): self.log_output('malformed_output ' + argv, message) def help_malformed_output(self): - print newLine + 'Usage: malformed_output [$option1 [$option2 ...] [$header_file]]' + newLine - print 'Enables malformed output when saving the file:' + newLine - print '\t0: Removes all the malformed options.' - print '\t1 [header_file]: Enable all the implemented tricks. Default option.' - print '\t2 [header_file]: Puts the default or specified header before the PDF header.' - print '\t3: Removes all the "endobj" tags.' - print '\t4: Removes all the "endstream" tags.' - print '\t5: Removes the "xref" section.' - print '\t6: Bad header: %PDF-1' + newLine + print((newLine + 'Usage: malformed_output [$option1 [$option2 ...] [$header_file]]' + newLine)) + print(('Enables malformed output when saving the file:' + newLine)) + print ('\t0: Removes all the malformed options.') + print ('\t1 [header_file]: Enable all the implemented tricks. Default option.') + print ('\t2 [header_file]: Puts the default or specified header before the PDF header.') + print ('\t3: Removes all the "endobj" tags.') + print ('\t4: Removes all the "endstream" tags.') + print ('\t5: Removes the "xref" section.') + print(('\t6: Bad header: %PDF-1' + newLine)) def do_metadata(self, argv): if self.pdfFile is None: @@ -2620,11 +2647,10 @@ def do_metadata(self, argv): return False def help_metadata(self): - print newLine + 'Usage: metadata [$version]' - print newLine + 'Shows the metadata of the document or version of the document' + newLine + print((newLine + 'Usage: metadata [$version]')) + print((newLine + 'Shows the metadata of the document or version of the document' + newLine)) def do_modify(self, argv): - maxDepth = 2 validModifyTypes = ['object', 'stream'] args = self.parseArgs(argv) if args is None: @@ -2695,7 +2721,7 @@ def do_modify(self, argv): streamContent = open(contentFile, 'rb').read() else: if self.use_rawinput: - streamContent = raw_input( + streamContent = input( newLine + 'Please, specify the stream content (if the content includes EOL characters use a file instead):' + newLine * 2) else: message = '*** Error: in batch mode you must specify a file storing the stream content!!' @@ -2710,8 +2736,8 @@ def do_modify(self, argv): self.log_output('modify ' + argv, message) def help_modify(self): - print newLine + 'Usage: modify object|stream $object_id [$version] [$file]' + newLine - print 'Modifies the object or stream specified. It\'s possible to use a file to retrieve the stream content (ONLY for stream content).' + newLine + print((newLine + 'Usage: modify object|stream $object_id [$version] [$file]' + newLine)) + print(('Modifies the object or stream specified. It\'s possible to use a file to retrieve the stream content (ONLY for stream content).' + newLine)) def do_object(self, argv): if self.pdfFile is None: @@ -2750,8 +2776,8 @@ def do_object(self, argv): self.log_output('object ' + argv, value) def help_object(self): - print newLine + 'Usage: object $object_id [$version]' - print newLine + 'Shows the content of the object after being decoded and decrypted.' + newLine + print((newLine + 'Usage: object $object_id [$version]')) + print((newLine + 'Shows the content of the object after being decoded and decrypted.' + newLine)) def do_offsets(self, argv): if self.pdfFile is None: @@ -2786,16 +2812,16 @@ def do_offsets(self, argv): for i in range(len(offsetsArray)): offsets = offsetsArray[i] - if i == 0 and offsets.has_key('header'): + if i == 0 and "header" in offsets: offset, size = offsets['header'] offsetsOutput += '%8d %s%s' % (offset, 'Header', newLine) elif version is None: offsetsOutput += newLine + 'Version ' + str(i) + ':' + newLine * 2 - if offsets.has_key('objects'): + if "objects" in offsets: compressedObjects = offsets['compressed'] sortedObjectList = sorted(offsets['objects'], key=lambda x: x[1]) for id, offset, size in sortedObjectList: - #offsetsOutput += '%8d %s %d (%d)%s' % (offset,'Object ',id,size,newLine) + # offsetsOutput += '%8d %s %d (%d)%s' % (offset,'Object ',id,size,newLine) if id in compressedObjects: offsetsOutput += '%8d%s%8s%s %d (%d)%s%8d%s' % ( offset, newLine, '', 'Compressed Object ', id, size, newLine, offset + size - 1, newLine) @@ -2804,12 +2830,12 @@ def do_offsets(self, argv): offset, newLine, '', 'Object ', id, size, newLine, offset + size - 1, newLine) if offsets['xref'] is not None: offset, size = offsets['xref'] - #offsetsOutput += '%8d %s (%d)%s' % (offset,'Xref Section',size,newLine) + # offsetsOutput += '%8d %s (%d)%s' % (offset,'Xref Section',size,newLine) offsetsOutput += '%8d%s%8s%s (%d)%s%8d%s' % ( offset, newLine, '', 'Xref Section', size, newLine, offset + size - 1, newLine) if offsets['trailer'] is not None: offset, size = offsets['trailer'] - #offsetsOutput += '%8d %s (%d)%s' % (offset,'Trailer',size,newLine) + # offsetsOutput += '%8d %s (%d)%s' % (offset,'Trailer',size,newLine) offsetsOutput += '%8d%s%8s%s (%d)%s%8d%s' % ( offset, newLine, '', 'Trailer', size, newLine, offset + size - 1, newLine) if offsets['eof'] is not None: @@ -2819,8 +2845,8 @@ def do_offsets(self, argv): self.log_output('offsets ' + argv, offsetsOutput) def help_offsets(self): - print newLine + 'Usage: offsets [$version]' - print newLine + 'Shows the physical map of the file or the specified version of the document' + newLine + print((newLine + 'Usage: offsets [$version]')) + print((newLine + 'Shows the physical map of the file or the specified version of the document' + newLine)) def do_open(self, argv): forceMode = False @@ -2864,33 +2890,31 @@ def do_open(self, argv): self.pdfFile = None self.log_output('open ' + argv, message) if not JS_MODULE: - print 'Warning: PyV8 is not installed!!' + newLine + print(('Warning: PyV8 is not installed!!' + newLine)) if self.pdfFile is not None: self.do_info('') def help_open(self): - print newLine + 'Usage: open [-fl] $file_name' + newLine - print 'Opens and parses the specified file' + newLine - print 'Options:' - print '\t-f: Sets force parsing mode to ignore errors' - print '\t-l: Sets loose parsing mode for problematic files' + newLine + print((newLine + 'Usage: open [-fl] $file_name' + newLine)) + print(('Opens and parses the specified file' + newLine)) + print ('Options:') + print ('\t-f: Sets force parsing mode to ignore errors') + print(('\t-l: Sets loose parsing mode for problematic files' + newLine)) def do_quit(self, argv): return True def help_quit(self): - print newLine + 'Usage: quit' - print newLine + 'Exits from the console' + newLine + print((newLine + 'Usage: quit')) + print((newLine + 'Exits from the console' + newLine)) def do_rawobject(self, argv): if self.pdfFile is None: message = '*** Error: You must open a file!!' self.log_output('rawobject ' + argv, message) return False - compressed = False rawValue = '' offset = 0 - size = 0 args = self.parseArgs(argv) if args is None: message = '*** Error: The command line arguments have not been parsed successfully!!' @@ -2923,7 +2947,6 @@ def do_rawobject(self, argv): xrefArray = ret[1] if xrefArray[0] is not None: offset = xrefArray[0].getOffset() - size = xrefArray[0].getSize() rawValue = xrefArray[0].toFile() elif id == 'trailer': ret = self.pdfFile.getTrailer(version) @@ -2935,7 +2958,6 @@ def do_rawobject(self, argv): trailerArray = ret[1] if trailerArray[0] is not None: offset = trailerArray[0].getOffset() - size = trailerArray[0].getSize() rawValue = trailerArray[0].toFile() else: id = int(id) @@ -2945,9 +2967,7 @@ def do_rawobject(self, argv): self.log_output('rawobject ' + argv, message) return False object = indirectObject.getObject() - compressed = object.isCompressed() offset = indirectObject.getOffset() - size = indirectObject.getSize() rawValue = str(object.getRawValue()) if offset == -1: message = '*** Error: offset cannot be calculated!!' @@ -2967,8 +2987,8 @@ def do_rawobject(self, argv): self.log_output('rawobject ' + argv, rawValue) def help_rawobject(self): - print newLine + 'Usage: rawobject [$object_id|xref|trailer [$version]]' - print newLine + 'Shows the content of the object without being decoded or decrypted (object_id, xref, trailer)' + newLine + print((newLine + 'Usage: rawobject [$object_id|xref|trailer [$version]]')) + print((newLine + 'Shows the content of the object without being decoded or decrypted (object_id, xref, trailer)' + newLine)) def do_rawstream(self, argv): if self.pdfFile is None: @@ -3011,8 +3031,8 @@ def do_rawstream(self, argv): self.log_output('rawstream ' + argv, value, [value], bytesOutput=True) def help_rawstream(self): - print newLine + 'Usage: rawstream $object_id [$version]' - print newLine + 'Shows the stream content of the specified document version before being decoded and decrypted' + newLine + print((newLine + 'Usage: rawstream $object_id [$version]')) + print((newLine + 'Shows the stream content of the specified document version before being decoded and decrypted' + newLine)) def do_references(self, argv): if self.pdfFile is None: @@ -3033,8 +3053,8 @@ def do_references(self, argv): return False command = args[0] id = args[1] - if not id.isdigit() or (version is not None and not version.isdigit()) or ( - command.lower() != 'to' and command.lower() != 'in'): + if not id.isdigit() or (version is not None and not version.isdigit()) or \ + (command.lower() != 'to' and command.lower() != 'in'): self.help_references() return False id = int(id) @@ -3055,8 +3075,8 @@ def do_references(self, argv): self.log_output('references ' + argv, str(references)) def help_references(self): - print newLine + 'Usage: references to|in $object_id [$version]' - print newLine + 'Shows the references in the object or to the object in the specified version of the document' + newLine + print((newLine + 'Usage: references to|in $object_id [$version]')) + print((newLine + 'Shows the references in the object or to the object in the specified version of the document' + newLine)) def do_replace(self, argv): replaceOutput = '' @@ -3113,7 +3133,7 @@ def do_replace(self, argv): else: message = 'String not found!!' else: - if self.variables.has_key(src): + if src in self.variables: if self.variables[src][0].find(string1) != -1: replaceOutput = self.variables[src][0].replace(string1, string2) self.variables[src][0] = replaceOutput @@ -3125,11 +3145,11 @@ def do_replace(self, argv): self.log_output('replace ' + argv, message) def help_replace(self): - print newLine + 'Usage: replace all $string1 $string2' - print newLine + 'Replaces $string1 with $string2 in the whole PDF file' + newLine - print 'Usage: replace variable $var_name $string1 $string2' - print 'Usage: replace file $file_name $string1 $string2' - print newLine + 'Replaces $string1 with $string2 in the content of the specified variable or file' + newLine + print((newLine + 'Usage: replace all $string1 $string2')) + print((newLine + 'Replaces $string1 with $string2 in the whole PDF file' + newLine)) + print ('Usage: replace variable $var_name $string1 $string2') + print ('Usage: replace file $file_name $string1 $string2') + print((newLine + 'Replaces $string1 with $string2 in the content of the specified variable or file' + newLine)) def do_reset(self, argv): args = self.parseArgs(argv) @@ -3142,7 +3162,7 @@ def do_reset(self, argv): clearScreen() elif numArgs == 1: var = args[0] - if self.variables.has_key(var): + if var in self.variables: self.variables[var][0] = self.variables[var][1] if var == 'output' and (self.variables[var][0] == 'file' or self.variables[var][0] == 'variable'): message = var + ' = "' + self.output + '" (' + str(self.variables[var][0]) + ')' @@ -3162,10 +3182,10 @@ def do_reset(self, argv): self.help_reset() def help_reset(self): - print newLine + 'Usage: reset' - print newLine + 'Cleans the console' - print newLine + 'Usage: reset $var_name' - print newLine + 'Resets the variable value to the default value if applicable' + newLine + print((newLine + 'Usage: reset')) + print((newLine + 'Cleans the console')) + print((newLine + 'Usage: reset $var_name')) + print((newLine + 'Resets the variable value to the default value if applicable' + newLine)) def do_save(self, argv): if self.pdfFile is None: @@ -3194,8 +3214,8 @@ def do_save(self, argv): self.help_save() def help_save(self): - print newLine + 'Usage: save [$file_name]' - print newLine + 'Saves the file to disk' + newLine + print((newLine + 'Usage: save [$file_name]')) + print((newLine + 'Saves the file to disk' + newLine)) def do_save_version(self, argv): if self.pdfFile is None: @@ -3230,8 +3250,8 @@ def do_save_version(self, argv): self.help_save_version() def help_save_version(self): - print newLine + 'Usage: save_version $version $file_name' - print newLine + 'Saves the selected file version to disk' + newLine + print((newLine + 'Usage: save_version $version $file_name')) + print((newLine + 'Saves the selected file version to disk' + newLine)) def do_sctest(self, argv): if not EMU_MODULE: @@ -3302,7 +3322,7 @@ def do_sctest(self, argv): src = args[1] if type == 'variable': - if not self.variables.has_key(src): + if src not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('sctest ' + argv, message) return False @@ -3344,10 +3364,10 @@ def do_sctest(self, argv): self.log_output('sctest ' + argv, output) def help_sctest(self): - print newLine + 'Usage: sctest [-v] variable $var_name' - print 'Usage: sctest [-v] file $file_name' - print 'Usage: sctest [-v] raw $offset $num_bytes' - print newLine + 'Wrapper of the sctest tool (libemu) to emulate shellcodes. With -v the output is verbose, be ready for tons of data ;p' + newLine + print((newLine + 'Usage: sctest [-v] variable $var_name')) + print ('Usage: sctest [-v] file $file_name') + print ('Usage: sctest [-v] raw $offset $num_bytes') + print((newLine + 'Wrapper of the sctest tool (libemu) to emulate shellcodes. With -v the output is verbose, be ready for tons of data ;p' + newLine)) def do_search(self, argv): if self.pdfFile is None: @@ -3410,9 +3430,9 @@ def do_search(self, argv): self.log_output('search ' + argv, output) def help_search(self): - print newLine + 'Usage: search [hex] $string' - print newLine + 'Search the specified string or hexadecimal string in the objects (decoded and encrypted streams included)' + newLine - print 'Example: search hex \\x34\\x35' + newLine + print((newLine + 'Usage: search [hex] $string')) + print((newLine + 'Search the specified string or hexadecimal string in the objects (decoded and encrypted streams included)' + newLine)) + print(('Example: search hex \\x34\\x35' + newLine)) def do_set(self, argv): consoleOutput = '' @@ -3426,7 +3446,7 @@ def do_set(self, argv): self.help_set() return False if numArgs == 0: - vars = self.variables.keys() + vars = list(self.variables.keys()) for var in vars: varContent = self.printResult(str(self.variables[var][0])) if varContent == str(self.variables[var][0]): @@ -3436,7 +3456,7 @@ def do_set(self, argv): consoleOutput += var + ' = ' + str(varContent) + newLine else: consoleOutput += var + ' = ' + newLine + varContent + newLine - print newLine + consoleOutput + print((newLine + consoleOutput)) else: varName = args[0] value = args[1] @@ -3451,19 +3471,19 @@ def do_set(self, argv): return False else: value = int(value) - if self.variables.has_key(varName): + if varName in self.variables: self.variables[varName][0] = value else: self.variables[varName] = [value, value] def help_set(self): - print newLine + 'Usage: set [$var_name $var_value]' - print newLine + 'Sets the specified variable value or creates one with this value. Without parameters all the variables are shown.' + newLine - print 'Special variables:' + newLine - print '\theader_file: READ ONLY. Specifies the file header to be used when \'malformed_options\' are active.' + newLine - print '\tmalformed_options: READ ONLY. Variable to store the malformed options used to save the file.' + newLine - print '\toutput_limit: variable to specify the maximum number of lines to be shown at once when the output is long (no limit = -1). By default there is no limit.' + newLine - print '\tvt_key: VirusTotal Api key.' + newLine + print((newLine + 'Usage: set [$var_name $var_value]')) + print((newLine + 'Sets the specified variable value or creates one with this value. Without parameters all the variables are shown.' + newLine)) + print(('Special variables:' + newLine)) + print(('\theader_file: READ ONLY. Specifies the file header to be used when \'malformed_options\' are active.' + newLine)) + print(('\tmalformed_options: READ ONLY. Variable to store the malformed options used to save the file.' + newLine)) + print(('\toutput_limit: variable to specify the maximum number of lines to be shown at once when the output is long (no limit = -1). By default there is no limit.' + newLine)) + print(('\tvt_key: VirusTotal Api key.' + newLine)) def do_show(self, argv): args = self.parseArgs(argv) @@ -3475,39 +3495,38 @@ def do_show(self, argv): self.help_show() return False var = args[0] - if not self.variables.has_key(var): - print newLine + '*** Error: The variable ' + var + ' does not exist!!' + newLine + if var not in self.variables: + print((newLine + '*** Error: The variable ' + var + ' does not exist!!' + newLine)) return False if var == 'output': if self.variables[var][0] == 'stdout': - print newLine + 'output = "stdout"' + newLine + print((newLine + 'output = "stdout"' + newLine)) else: if self.variables[var][0] == 'file': - print newLine + 'output = "file"' - print 'fileName = "' + self.output + '"' + newLine + print((newLine + 'output = "file"')) + print(('fileName = "' + self.output + '"' + newLine)) else: - print newLine + 'output = "variable"' - print 'varName = "' + self.output + '"' + newLine + print((newLine + 'output = "variable"')) + print(('varName = "' + self.output + '"' + newLine)) else: varContent = self.printResult(str(self.variables[var][0])) - print newLine + varContent + newLine + print((newLine + varContent + newLine)) def help_show(self): - print newLine + 'Usage: show $var_name' - print newLine + 'Shows the value of the specified variable' + newLine - print 'Special variables:' + newLine - print '\theader_file' - print '\tmalformed_options' - print '\toutput' - print '\toutput_limit' - print '\tvt_key' + newLine + print((newLine + 'Usage: show $var_name')) + print((newLine + 'Shows the value of the specified variable' + newLine)) + print(('Special variables:' + newLine)) + print ('\theader_file') + print ('\tmalformed_options') + print ('\toutput') + print ('\toutput_limit') + print(('\tvt_key' + newLine)) def do_stream(self, argv): if self.pdfFile is None: message = '*** Error: You must open a file!!' self.log_output('stream ' + argv, message) return False - result = '' args = self.parseArgs(argv) if args is None: message = '*** Error: The command line arguments have not been parsed successfully!!' @@ -3548,9 +3567,8 @@ def do_stream(self, argv): self.log_output('stream ' + argv, value, [value], bytesOutput=True) def help_stream(self): - print newLine + 'Usage: stream $object_id [$version]' - print newLine + 'Shows the object stream content of the specified version after being decoded and decrypted (if necessary)' + newLine - + print((newLine + 'Usage: stream $object_id [$version]')) + print((newLine + 'Shows the object stream content of the specified version after being decoded and decrypted (if necessary)' + newLine)) def do_tree(self, argv): if self.pdfFile is None: @@ -3598,8 +3616,8 @@ def do_tree(self, argv): self.log_output('tree ' + argv, treeOutput) def help_tree(self): - print newLine + 'Usage: tree [$version]' - print newLine + 'Shows the tree graph of the file or specified version' + newLine + print((newLine + 'Usage: tree [$version]')) + print((newLine + 'Shows the tree graph of the file or specified version' + newLine)) def do_vtcheck(self, argv): content = '' @@ -3651,7 +3669,7 @@ def do_vtcheck(self, argv): self.help_vtcheck() return False if type == 'variable': - if not self.variables.has_key(srcName): + if srcName not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('vtcheck ' + argv, message) return False @@ -3719,10 +3737,11 @@ def do_vtcheck(self, argv): self.log_output('vtcheck ' + argv, message) return False jsonDict = ret[1] - if jsonDict.has_key('response_code'): + if "response_code" in jsonDict: if jsonDict['response_code'] == 1: - if jsonDict.has_key('scan_date') and jsonDict.has_key('positives') and jsonDict.has_key( - 'total') and jsonDict.has_key('scans') and jsonDict.has_key('permalink'): + if "scan_date" in jsonDict and "positives" in jsonDict and \ + "total" in jsonDict and "scans" in jsonDict and \ + "permalink" in jsonDict: detectionColor = '' if args == []: self.pdfFile.setDetectionRate([jsonDict['positives'], jsonDict['total']]) @@ -3745,8 +3764,8 @@ def do_vtcheck(self, argv): for engine in jsonDict['scans']: engineResults = jsonDict['scans'][engine] - if engineResults.has_key('detected') and engineResults.has_key( - 'version') and engineResults.has_key('result') and engineResults.has_key('update'): + if "detected" in engineResults and "version" in engineResults and \ + "result" in engineResults and "update" in engineResults: if engineResults['detected']: output += '%25s\t%18s\t%10s\t%s%s%s%s' % ( engine, engineResults['version'], engineResults['update'], self.alertColor, @@ -3766,20 +3785,18 @@ def do_vtcheck(self, argv): self.log_output('vtcheck ' + argv, output) def help_vtcheck(self): - print newLine + 'Usage: vtcheck' - print 'Usage: vtcheck object|rawobject|stream|rawstream $object_id [$version]' - print 'Usage: vtcheck raw $offset $num_bytes' - print 'Usage: vtcheck file $file_name' - print 'Usage: vtcheck variable $var_name' - print newLine + 'Checks the hash of the specified source on VirusTotal: raw bytes of the file, objects and streams, and the content of files or variables.' - print 'If no parameters are specified then the hash of the PDF document will be checked.' + newLine - print '*** NOTE: NO CONTENT IS SENT TO VIRUSTOTAL, JUST HASHES!!' + newLine - print '*** NOTE: You need a VirusTotal API key to use this command.' + newLine + print((newLine + 'Usage: vtcheck')) + print ('Usage: vtcheck object|rawobject|stream|rawstream $object_id [$version]') + print ('Usage: vtcheck raw $offset $num_bytes') + print ('Usage: vtcheck file $file_name') + print ('Usage: vtcheck variable $var_name') + print((newLine + 'Checks the hash of the specified source on VirusTotal: raw bytes of the file, objects and streams, and the content of files or variables.')) + print(('If no parameters are specified then the hash of the PDF document will be checked.' + newLine)) + print(('*** NOTE: NO CONTENT IS SENT TO VIRUSTOTAL, JUST HASHES!!' + newLine)) + print(('*** NOTE: You need a VirusTotal API key to use this command.' + newLine)) def do_xor(self, argv): content = '' - found = False - outputBytes = '' validTypes = ['variable', 'file', 'raw', 'stream', 'rawstream'] args = self.parseArgs(argv) if args is None: @@ -3845,7 +3862,7 @@ def do_xor(self, argv): return False key = chr(int(key, 16)) if type == 'variable': - if not self.variables.has_key(srcName): + if srcName not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('xor ' + argv, message) return False @@ -3916,19 +3933,18 @@ def do_xor(self, argv): self.log_output('xor ' + argv, output, [output], bytesOutput=True) def help_xor(self): - print newLine + 'Usage: xor stream|rawstream $object_id [$version] [$key]' - print 'Usage: xor raw $offset $num_bytes $key' - print 'Usage: xor file $file_name $key' - print 'Usage: xor variable $var_name $key' - print newLine + 'Performs an XOR operation using the specified key with the content of the specified file or variable, raw bytes of the file or stream/rawstream.' - print 'If the key is not specified then a bruteforcing XOR is performed.' + newLine + print((newLine + 'Usage: xor stream|rawstream $object_id [$version] [$key]')) + print ('Usage: xor raw $offset $num_bytes $key') + print ('Usage: xor file $file_name $key') + print ('Usage: xor variable $var_name $key') + print((newLine + 'Performs an XOR operation using the specified key with the content of the specified file or variable, raw bytes of the file or stream/rawstream.')) + print(('If the key is not specified then a bruteforcing XOR is performed.' + newLine)) def do_xor_search(self, argv): content = '' found = False - decValues = range(256) + decValues = list(range(256)) successfullKeys = {} - outputBytes = '' caseSensitive = True validTypes = ['variable', 'file', 'raw', 'stream', 'rawstream'] args = self.parseArgs(argv) @@ -3969,7 +3985,7 @@ def do_xor_search(self, argv): self.help_xor_search() return False if type == 'variable': - if not self.variables.has_key(srcName): + if srcName not in self.variables: message = '*** Error: The variable does not exist!!' self.log_output('xor_search ' + argv, message) return False @@ -4052,11 +4068,11 @@ def do_xor_search(self, argv): offset += len(m) auxXored = auxXored[index + len(m):] successfullKeys[hex(i)] = offsets - #outputBytes += '[' + hex(i) + ']' + newLine - #outputBytes += xored + newLine - #outputBytes += '[/' + hex(i) + ']' + newLine*2 + # outputBytes += '[' + hex(i) + ']' + newLine + # outputBytes += xored + newLine + # outputBytes += '[/' + hex(i) + ']' + newLine*2 if found: - keys = successfullKeys.keys() + keys = list(successfullKeys.keys()) message = 'Pattern found with the following keys: ' + str(keys) + newLine * 2 for key in keys: message += 'Offsets for key \'' + str(key) + '\': ' + str(successfullKeys[key]) + newLine @@ -4065,18 +4081,18 @@ def do_xor_search(self, argv): self.log_output('xor_search ' + argv, message) def help_xor_search(self): - print newLine + 'Usage: xor_search [-i] stream|rawstream $object_id [$version] $string_to_search' - print 'Usage: xor_search [-i] raw $offset $num_bytes $string_to_search' - print 'Usage: xor_search [-i] file $file_name $string_to_search' - print 'Usage: xor_search [-i] variable $var_name $string_to_search' - print newLine + 'Searches for the specified string in the result of an XOR brute forcing operation with the content of the specified file or variable,' - print 'raw bytes of the file or stream/rawstream. The output shows the offset/s where the string is found. It\'s a case sensitive search but' - print 'it\'s possible to make it insensitive using -i.' + newLine + print((newLine + 'Usage: xor_search [-i] stream|rawstream $object_id [$version] $string_to_search')) + print ('Usage: xor_search [-i] raw $offset $num_bytes $string_to_search') + print ('Usage: xor_search [-i] file $file_name $string_to_search') + print ('Usage: xor_search [-i] variable $var_name $string_to_search') + print((newLine + 'Searches for the specified string in the result of an XOR brute forcing operation with the content of the specified file or variable,')) + print ('raw bytes of the file or stream/rawstream. The output shows the offset/s where the string is found. It\'s a case sensitive search but') + print(('it\'s possible to make it insensitive using -i.' + newLine)) def additionRequest(self, dict=False): ''' Method to ask the user if he wants to add more entries to the object or not - + @param dict: Boolean to specify if the added object is a dictionary or not. Default value: False. @return: The response chosen by the user ''' @@ -4084,7 +4100,7 @@ def additionRequest(self, dict=False): message = newLine + 'Do you want to add more objects? (y/n) ' else: message = newLine + 'Do you want to add more entries? (y/n) ' - res = raw_input(message) + res = input(message) if res.lower() in ['y', 'n']: return res.lower() else: @@ -4093,7 +4109,7 @@ def additionRequest(self, dict=False): def addObject(self, iteration, maxDepth=10): ''' Method to add a new object to an array or dictionary - + @param iteration: Integer which specifies the depth of the recursion in the same object @param maxDepth: The maximum depth for nested objects. Default value: 10. @return: The new object @@ -4112,12 +4128,12 @@ def addObject(self, iteration, maxDepth=10): '\t7 - null' + newLine + \ '\t8 - array' + newLine + \ '\t9 - dictionary' + newLine - res = raw_input(message) + res = input(message) if not res.isdigit() or int(res) < 1 or int(res) > 9: return (-1, 'Object type not valid!!') objectType = dictNumType[res] if objectType != 'array' and objectType != 'dictionary': - content = raw_input(newLine + 'Please, specify the ' + objectType + ' object content:' + newLine * 2) + content = input(newLine + 'Please, specify the ' + objectType + ' object content:' + newLine * 2) content = self.checkInputContent(objectType, content) if content is None: return (-1, '*** Error: Content not valid for the object type!!') @@ -4140,7 +4156,7 @@ def addObject(self, iteration, maxDepth=10): object = PDFNull(content) elif objectType == 'array': elements = [] - print 'Please, now specify the elements of the array:' + print ('Please, now specify the elements of the array:') while True: res = self.additionRequest() if res is None: @@ -4155,13 +4171,13 @@ def addObject(self, iteration, maxDepth=10): object = PDFArray(elements=elements) elif objectType == 'dictionary': elements = {} - print 'Please, now specify the elements of the dictionary:' + print ('Please, now specify the elements of the dictionary:') while True: res = self.additionRequest(dict=True) if res is None: return (-1, 'Option not valid!!') elif res == 'y': - key = raw_input('Name object: ') + key = input('Name object: ') key = self.checkInputContent('name', key) ret = self.addObject(iteration + 1) if ret[0] == -1: @@ -4173,14 +4189,14 @@ def addObject(self, iteration, maxDepth=10): return (0, object) def checkInputContent(self, objectType, objectContent): - ''' + """ Check if the specified content is valid for the specified object type and modify it\'s possible - + @param objectType: The type of object: number, string, hexstring, name, reference, null @param objectContent: The object content @return: The content of the object or None if any problems occur - ''' - spacesChars = ['\x00', '\x09', '\x0a', '\x0c', '\x0d', '\x20'] + """ + spacesChars = ['\x00', '\x09', '\x0a', '\x0c','\x0d', '\x20'] demilimiterChars = ['<<', '(', '<', '[', '{', '/', '%'] if objectType == 'bool': if objectContent.lower() not in ['true', 'false']: @@ -4237,12 +4253,12 @@ def checkInputContent(self, objectType, objectContent): def log_output(self, command, output, bytesToSave=None, printOutput=True, bytesOutput=False): ''' Method to check the commands output and write it to the console and/or files / variables - + @param command: The command launched @param output: The output of the command @param bytesToSave: A list with the raw bytes which will be stored in a file or variable if a redirection has been set (>,>>,$>,$>>). @param printOutput: Boolean to specify if the output will be written to the console or not. Default value: True. - @param bytesOutput: Boolean to specify if we want to print raw bytes or not. Default value: False. + @param bytesOutput: Boolean to specify if we want to print raw bytes or not. Default value: False. ''' errorIndex = output.find('*** Error') if errorIndex != -1: @@ -4279,16 +4295,17 @@ def log_output(self, command, output, bytesToSave=None, printOutput=True, bytesO if self.redirect == VAR_WRITE: self.variables[varName] = [bytes, bytes] elif self.redirect == VAR_ADD: - if self.variables.has_key(varName): + if varName in self.variables: self.variables[varName][0] += bytes else: self.variables[varName] = [bytes, bytes] elif printOutput: if niceOutput: niceOutput = newLine + niceOutput + newLine - if self.variables['output_limit'][0] is None or self.variables['output_limit'][ - 0] == -1 or not self.use_rawinput: - print niceOutput + if self.variables['output_limit'][0] is None or \ + self.variables['output_limit'][0] == -1 or \ + not self.use_rawinput: + print(niceOutput) else: limit = int(self.variables['output_limit'][0]) lines = niceOutput.split(newLine) @@ -4296,17 +4313,17 @@ def log_output(self, command, output, bytesToSave=None, printOutput=True, bytesO outputStepLines = lines[:limit] lines = lines[limit:] for line in outputStepLines: - print line + print(line) if len(lines) == 0: break - ch = raw_input('( Press to continue or to quit )') + ch = input('( Press to continue or to quit )') if ch == 'q' or ch == 'Q': break def modifyObject(self, object, iteration=0, contentFile=None, maxDepth=10): ''' Method to modify an existent object - + @param object: The object to be modified @param iteration: Integer which specifies the depth of the recursion in the same object @param contentFile: The content of the file storing the stream @@ -4322,7 +4339,7 @@ def modifyObject(self, object, iteration=0, contentFile=None, maxDepth=10): content = open(contentFile, 'rb').read() else: if objectType == 'string' or objectType == 'hexstring': - res = raw_input(newLine + 'Do you want to enter an ascii (1) or hexadecimal (2) string? (1/2) ') + res = input(newLine + 'Do you want to enter an ascii (1) or hexadecimal (2) string? (1/2) ') if res == '1': newObjectType = 'string' elif res == '2': @@ -4332,7 +4349,7 @@ def modifyObject(self, object, iteration=0, contentFile=None, maxDepth=10): elif objectType == 'integer' or objectType == 'real': newObjectType = 'number' if iteration == 0: - content = raw_input( + content = input( newLine + 'Please, specify the ' + newObjectType + ' object content (if the content includes EOL characters use a file instead):' + newLine * 2) else: value = object.getValue() @@ -4341,7 +4358,7 @@ def modifyObject(self, object, iteration=0, contentFile=None, maxDepth=10): if res == 'd': return (0, None) elif res == 'm': - content = raw_input( + content = input( newLine + 'Please, specify the ' + newObjectType + ' object content:' + newLine * 2) else: return (0, object) @@ -4395,7 +4412,7 @@ def modifyObject(self, object, iteration=0, contentFile=None, maxDepth=10): if contentFile is not None: streamContent = open(contentFile, 'rb').read() else: - streamContent = raw_input( + streamContent = input( newLine + 'Please, specify the stream content (if the content includes EOL characters use a file instead):' + newLine * 2) object.setDecodedStream(streamContent) else: @@ -4419,7 +4436,7 @@ def modifyObject(self, object, iteration=0, contentFile=None, maxDepth=10): if res is None: return (-1, 'Option not valid!!') elif res == 'y': - key = raw_input('Name object: ') + key = input('Name object: ') key = self.checkInputContent('name', key) ret = self.addObject(iteration + 1) if ret[0] == -1: @@ -4433,7 +4450,7 @@ def modifyObject(self, object, iteration=0, contentFile=None, maxDepth=10): def modifyRequest(self, value, rawValue, key=None, stream=False): ''' Method to ask the user what he wants to do with the object: modify, delete or nothing. - + @param value: The value of the object. @param rawValue: The raw value of the object. @param key: The key of a dictionary entry. @@ -4452,18 +4469,18 @@ def modifyRequest(self, value, rawValue, key=None, stream=False): if stream: message += ' in the STREAM' message += '? (m/d/n) ' - response = raw_input(message) + response = input(message) if response.lower() not in ['m', 'd', 'n']: return None else: if stream and response.lower() == 'm': - print 'Value: ' + str(value) + newLine + print(('Value: ' + str(value) + newLine)) return response.lower() def parseArgs(self, args): ''' Method to split up the command arguments by quotes: \'\'\', " or \' - + @param args: The command arguments @return: An array with the separated arguments ''' @@ -4521,7 +4538,6 @@ def parseArgs(self, args): else: argsArray.append(args) args = '' - #print argsArray if len(argsArray) > 1: if argsArray[-2] in redirectSymbols: if argsArray[-2] == '>': @@ -4582,7 +4598,7 @@ def parseArgs(self, args): def printBytes(self, bytes): ''' Given a byte string shows the hexadecimal and ascii output in a nice way - + @param bytes: A string @return: String with mixed hexadecimal and ascii strings, like the 'hexdump -C' output ''' @@ -4616,7 +4632,7 @@ def printBytes(self, bytes): def printResult(self, result): ''' Given an string returns a mixed hexadecimal-ascci output if there are many non printable characters or the same string in other case - + @param result: A string @return: A mixed hexadecimal-ascii output if there are many non printable characters or the input string in other case ''' @@ -4630,7 +4646,7 @@ def printResult(self, result): def printTreeNode(self, node, nodesInfo, expandedNodes=[], depth=0, recursive=True): ''' Given a tree prints the whole tree and its dependencies - + @param node: Root of the tree @param nodesInfo: Information abour the nodes of the tree @param expandedNodes: Already expanded nodes @@ -4639,7 +4655,7 @@ def printTreeNode(self, node, nodesInfo, expandedNodes=[], depth=0, recursive=Tr @return: A tuple (expandedNodes,output), where expandedNodes is a list with the distinct nodes and output is the string representation of the tree ''' output = '' - if nodesInfo.has_key(node): + if node in nodesInfo: if node not in expandedNodes or (node in expandedNodes and depth > 0): output += '\t' * depth + nodesInfo[node][0] + ' (' + str(node) + ')' + newLine if node not in expandedNodes: @@ -4647,7 +4663,7 @@ def printTreeNode(self, node, nodesInfo, expandedNodes=[], depth=0, recursive=Tr children = nodesInfo[node][1] if children != []: for child in children: - if nodesInfo.has_key(child): + if child in nodesInfo: childType = nodesInfo[child][0] else: childType = 'Unknown' diff --git a/PDFCore.py b/peepdf/PDFCore.py similarity index 81% rename from PDFCore.py rename to peepdf/PDFCore.py index 3b2fe00..5956ce7 100644 --- a/PDFCore.py +++ b/peepdf/PDFCore.py @@ -3,7 +3,7 @@ # http://peepdf.eternal-todo.com # By Jose Miguel Esparza # -# Copyright (C) 2011-2017 Jose Miguel Esparza +# Copyright (C) 2011-2018 Jose Miguel Esparza # # This file is part of peepdf. # @@ -25,11 +25,25 @@ This module contains classes and methods to analyse and modify PDF files ''' -import sys,os,re,hashlib,struct,aes as AES -from PDFUtils import * -from PDFCrypto import * -from JSAnalysis import * -from PDFFilters import decodeStream,encodeStream +import hashlib +import os +import random +import re +import sys +import six +import codecs + +import peepdf.aes as AES +from peepdf.PDFUtils import ( + encodeName, unescapeString, encodeString, escapeString, numToHex, + numToString +) +from peepdf.PDFCrypto import ( + RC4, computeObjectKey, computeUserPass, isUserPass, isOwnerPass, + computeEncryptionKey, computeOwnerPass +) +from peepdf.JSAnalysis import isJavascript, analyseJS +from peepdf.PDFFilters import decodeStream, encodeStream MAL_ALL = 1 MAL_HEAD = 2 @@ -41,10 +55,10 @@ newLine = os.linesep isForceMode = False isManualAnalysis = False -spacesChars = ['\x00','\x09','\x0a','\x0c','\x0d','\x20'] -delimiterChars = ['<<','(','<','[','{','/','%'] -monitorizedEvents = ['/OpenAction ','/AA ','/Names ','/AcroForm ', '/XFA '] -monitorizedActions = ['/JS ','/JavaScript','/Launch','/SubmitForm','/ImportData'] +spacesChars = ['\x00', '\x09', '\x0a', '\x0c', '\x0d', '\x20'] +delimiterChars = ['<<', '(', '<', '[', '{', '/', '%'] +monitorizedEvents = ['/OpenAction ', '/AA ', '/Names ', '/AcroForm ', '/XFA '] +monitorizedActions = ['/JS ', '/JavaScript', '/Launch', '/SubmitForm', '/ImportData'] monitorizedElements = ['/EmbeddedFiles ', '/EmbeddedFile', '/JBIG2Decode', @@ -67,32 +81,32 @@ 'app.removeToolButton'] singUniqueName = 'CoolType.SING.uniqueName' bmpVuln = 'BMP/RLE heap corruption' -vulnsDict = {'mailto':('mailto',['CVE-2007-5020']), - 'Collab.collectEmailInfo':('Collab.collectEmailInfo',['CVE-2007-5659']), - 'util.printf':('util.printf',['CVE-2008-2992']), - '/JBIG2Decode':('Adobe JBIG2Decode Heap Corruption',['CVE-2009-0658']), - 'getIcon':('getIcon',['CVE-2009-0927']), - 'getAnnots':('getAnnots',['CVE-2009-1492']), - 'spell.customDictionaryOpen':('spell.customDictionaryOpen',['CVE-2009-1493']), - 'media.newPlayer':('media.newPlayer',['CVE-2009-4324']), - '.rawValue':('Adobe Acrobat Bundled LibTIFF Integer Overflow',['CVE-2010-0188']), - singUniqueName:(singUniqueName,['CVE-2010-2883']), - 'doc.printSeps':('doc.printSeps',['CVE-2010-4091']), - '/U3D':('/U3D',['CVE-2009-3953','CVE-2009-3959','CVE-2011-2462']), - '/PRC':('/PRC',['CVE-2011-4369']), - 'keep.previous':('Adobe Reader XFA oneOfChild Un-initialized memory vulnerability',['CVE-2013-0640']), # https://labs.portcullis.co.uk/blog/cve-2013-0640-adobe-reader-xfa-oneofchild-un-initialized-memory-vulnerability-part-1/ - bmpVuln:(bmpVuln,['CVE-2013-2729']), - 'app.removeToolButton':('app.removeToolButton',['CVE-2013-3346'])} -jsContexts = {'global':None} - -class PDFObject : +vulnsDict = {'mailto': ('mailto', ['CVE-2007-5020']), + 'Collab.collectEmailInfo': ('Collab.collectEmailInfo', ['CVE-2007-5659']), + 'util.printf': ('util.printf', ['CVE-2008-2992']), + '/JBIG2Decode': ('Adobe JBIG2Decode Heap Corruption', ['CVE-2009-0658']), + 'getIcon': ('getIcon', ['CVE-2009-0927']), + 'getAnnots': ('getAnnots', ['CVE-2009-1492']), + 'spell.customDictionaryOpen': ('spell.customDictionaryOpen', ['CVE-2009-1493']), + 'media.newPlayer': ('media.newPlayer', ['CVE-2009-4324']), + '.rawValue': ('Adobe Acrobat Bundled LibTIFF Integer Overflow', ['CVE-2010-0188']), + singUniqueName: (singUniqueName, ['CVE-2010-2883']), + 'doc.printSeps': ('doc.printSeps', ['CVE-2010-4091']), + '/U3D': ('/U3D', ['CVE-2009-3953', 'CVE-2009-3959', 'CVE-2011-2462']), + '/PRC': ('/PRC', ['CVE-2011-4369']), + 'keep.previous': ('Adobe Reader XFA oneOfChild Un-initialized memory vulnerability', ['CVE-2013-0640']), # https://labs.portcullis.co.uk/blog/cve-2013-0640-adobe-reader-xfa-oneofchild-un-initialized-memory-vulnerability-part-1/ + bmpVuln: (bmpVuln, ['CVE-2013-2729']), + 'app.removeToolButton': ('app.removeToolButton', ['CVE-2013-3346'])} +jsContexts = {'global': None} + +class PDFObject: ''' Base class for all the PDF objects ''' - def __init__(self, raw = None): + def __init__(self, raw=None): ''' Constructor of a PDFObject - + @param raw: The raw value of the PDF object ''' self.references = [] @@ -110,38 +124,38 @@ def __init__(self, raw = None): self.errors = [] self.referencesInElements = {} self.compressedIn = None - + def addError(self, errorMessage): ''' Add an error to the object - + @param errorMessage: The error message to be added (string) ''' if errorMessage not in self.errors: self.errors.append(errorMessage) - + def contains(self, string): ''' Look for the string inside the object content - + @param string: A string @return: A boolean to specify if the string has been found or not ''' value = str(self.value) rawValue = str(self.rawValue) encValue = str(self.encryptedValue) - if re.findall(string,value,re.IGNORECASE) != [] or re.findall(string,rawValue,re.IGNORECASE) != [] or re.findall(string,encValue,re.IGNORECASE) != []: + if re.findall(string, value, re.IGNORECASE) != [] or re.findall(string, rawValue, re.IGNORECASE) != [] or re.findall(string, encValue, re.IGNORECASE) != []: return True if self.containsJS(): for js in self.JSCode: - if re.findall(string,js,re.IGNORECASE) != []: + if re.findall(string, js, re.IGNORECASE) != []: return True return False def containsJS(self): ''' Method to check if there are Javascript code inside the object - + @return: A boolean ''' return self.containsJScode @@ -159,81 +173,81 @@ def containsURIs(self): def encodeChars(self): ''' - Encode the content of the object if possible (only for PDFName, PDFString, PDFArray and PDFStreams) - + Encode the content of the object if possible (only for PDFName, PDFString, PDFArray and PDFStreams) + @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' - return (0,'') - + return (0, '') + def encrypt(self, password): ''' - Encrypt the content of the object if possible - + Encrypt the content of the object if possible + @param password: The password used to encrypt the object. It's dependent on the object. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' - return (0,'') + return (0, '') def getCompressedIn(self): ''' - Gets the id of the object (object stream) where the actual object is compressed - + Gets the id of the object (object stream) where the actual object is compressed + @return: The id (int) of the object stream or None if it's not compressed ''' return self.compressedIn - + def getEncryptedValue(self): ''' - Gets the encrypted value of the object - + Gets the encrypted value of the object + @return: The encrypted value or the raw value if the object is not encrypted ''' - return self.encryptedValue + return self.encryptedValue def getEncryptionKey(self): ''' - Gets the encryption key (password) used to encrypt the object - + Gets the encryption key (password) used to encrypt the object + @return: The password (string) or an empty string if it's not encrypted ''' return self.encryptionKey def getErrors(self): ''' - Gets the error messages found while parsing and processing the object - + Gets the error messages found while parsing and processing the object + @return: The array of errors of the object ''' return self.errors def getRawValue(self): ''' - Gets the raw value of the object - + Gets the raw value of the object + @return: The raw value of the object, this means without applying filters or decoding characters ''' return self.rawValue def getReferences(self): ''' - Gets the referenced objects in the actual object - - @return: An array of references in the object (Ex. ['1 0 R','12 0 R']) + Gets the referenced objects in the actual object + + @return: An array of references in the object (Ex. ['1 0 R', '12 0 R']) ''' return self.references - + def getReferencesInElements(self): ''' Gets the dependencies between elements in the object and objects in the rest of the document. - + @return: A dictionary of dependencies of the object (Ex. {'/Length':[5,'']} or {'/Length':[5,'354']}) ''' return self.referencesInElements def getStats(self): ''' - Gets the statistics of the object - + Gets the statistics of the object + @return: An array of different statistics of the object (object type, compression, references, etc) ''' stats = {} @@ -262,70 +276,70 @@ def getStats(self): else: stats['Errors'] = None return stats - + def getType(self): ''' - Gets the type of the object - + Gets the type of the object + @return: The object type (bool, null, real, integer, name, string, hexstring, reference, array, dictionary, stream) ''' return self.type def getValue(self): ''' - Gets the value of the object - + Gets the value of the object + @return: The value of the object, this means after applying filters and/or decoding characters and strings ''' - return self.value + return self.value def isCompressed(self): ''' - Specifies if the object is compressed or not - + Specifies if the object is compressed or not + @return: A boolean ''' - if self.compressedIn != None: + if self.compressedIn is not None: return True else: return False - + def isEncrypted(self): ''' - Specifies if the object is encrypted or not - + Specifies if the object is encrypted or not + @return: A boolean ''' return self.encrypted def isFaulty(self): ''' - Specifies if the object has errors or not - + Specifies if the object has errors or not + @return: A boolean ''' if self.errors == []: return False else: return True - + def replace(self, string1, string2): ''' - Searches the object for the 'string1' and if it's found it's replaced by 'string2' - + Searches the object for the 'string1' and if it's found it's replaced by 'string2' + @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' if self.value.find(string1) == -1 and self.rawValue.find(string1) == -1: - return (-1,'String not found') + return (-1, 'String not found') self.value = self.value.replace(string1, string2) self.rawValue = self.rawValue.replace(string1, string2) ret = self.update() return ret - + def resolveReferences(self): ''' - Replaces the reference to an object by its value if there are references not resolved. Ex. /Length 3 0 R - + Replaces the reference to an object by its value if there are references not resolved. Ex. /Length 3 0 R + @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' pass @@ -343,7 +357,7 @@ def setReferencedJSObject(self, value): def setCompressedIn(self, id): ''' Sets the object id of the object stream containing the actual object - + @param id: The object id (int) ''' self.compressedIn = id @@ -351,59 +365,59 @@ def setCompressedIn(self, id): def setEncryptedValue(self, value): ''' Sets the encrypted value of the object - - @param value: The encrypted value (string) + + @param value: The encrypted value (string) ''' self.encryptedValue = value - + def setEncryptionKey(self, password): ''' Sets the password to encrypt/decrypt the object - - @param password: The encryption key (string) + + @param password: The encryption key (string) ''' self.encryptionKey = password def setRawValue(self, newRawValue): ''' Sets the raw value of the object and updates the object if some modification is needed - + @param newRawValue: The new raw value (string) @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' self.rawValue = newRawValue ret = self.update() return ret - + def setReferencesInElements(self, resolvedReferencesDict): ''' Sets the resolved references array - - @param resolvedReferencesDict: A dictionary with the resolved references + + @param resolvedReferencesDict: A dictionary with the resolved references ''' self.referencesInElements = resolvedReferencesDict def setValue(self, newValue): ''' Sets the value of the object - - @param newValue: The new value of the object (string) + + @param newValue: The new value of the object (string) ''' self.value = newValue - + def update(self): ''' Updates the object after some modification has occurred - + @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' self.encryptedValue = self.rawValue - return (0,'') + return (0, '') def toFile(self): ''' - Gets the raw or encrypted value of the object to write it to an output file - + Gets the raw or encrypted value of the object to write it to an output file + @return: The raw/encrypted value of the object (string) ''' if self.encrypted: @@ -412,11 +426,11 @@ def toFile(self): return self.getRawValue() -class PDFBool (PDFObject) : +class PDFBool(PDFObject): ''' Boolean object of a PDF document ''' - def __init__(self, value) : + def __init__(self, value): self.type = 'bool' self.errors = [] self.references = [] @@ -431,11 +445,11 @@ def __init__(self, value) : self.compressedIn = None -class PDFNull (PDFObject) : +class PDFNull(PDFObject): ''' Null object of a PDF document ''' - def __init__(self, content) : + def __init__(self, content): self.type = 'null' self.errors = [] self.JSCode = [] @@ -450,11 +464,11 @@ def __init__(self, content) : self.references = [] -class PDFNum (PDFObject) : +class PDFNum(PDFObject): ''' Number object of a PDF document: can be an integer or a real number. ''' - def __init__(self, num) : + def __init__(self, num): self.errors = [] self.JSCode = [] self.uriList = [] @@ -473,14 +487,14 @@ def __init__(self, num) : self.addError(ret[1]) else: raise Exception(ret[1]) - + def replace(self, string1, string2): if self.value.find(string1) == -1: - return (-1,'String not found') + return (-1, 'String not found') self.value = self.value.replace(string1, string2) ret = self.update() return ret - + def update(self): self.errors = [] try: @@ -493,27 +507,27 @@ def update(self): except: errorMessage = 'Numeric conversion error' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) self.encryptedValue = str(self.rawValue) - return (0,'') - + return (0, '') + def setRawValue(self, rawValue): self.rawValue = rawValue - + def setValue(self, value): self.value = value ret = self.update() return ret - + def toFile(self): return str(self.rawValue) -class PDFName (PDFObject) : +class PDFName(PDFObject): ''' Name object of a PDF document ''' - def __init__(self, name) : + def __init__(self, name): self.type = 'name' self.errors = [] self.JSCode = [] @@ -545,12 +559,12 @@ def update(self): hexNumbers = re.findall('#([0-9a-f]{2})', self.value, re.DOTALL | re.IGNORECASE) try: for hexNumber in hexNumbers: - self.value = self.value.replace('#' + hexNumber, chr(int(hexNumber,16))) + self.value = self.value.replace('#' + hexNumber, chr(int(hexNumber, 16))) except: errorMessage = 'Error in hexadecimal conversion' self.addError(errorMessage) - return (-1,errorMessage) - return (0,'') + return (-1, errorMessage) + return (0, '') def encodeChars(self): ret = encodeName(self.value) @@ -559,14 +573,14 @@ def encodeChars(self): return ret else: self.rawValue = ret[1] - return (0,'') + return (0, '') -class PDFString (PDFObject) : +class PDFString(PDFObject): ''' String object of a PDF document ''' - def __init__(self, string) : + def __init__(self, string): self.type = 'string' self.errors = [] self.compressedIn = None @@ -587,11 +601,11 @@ def __init__(self, string) : self.addError(ret[1]) else: raise Exception(ret[1]) - - def update(self, decrypt = False): + + def update(self, decrypt=False): ''' Updates the object after some modification has occurred - + @param decrypt: A boolean indicating if a decryption has been performed. By default: False. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -603,21 +617,21 @@ def update(self, decrypt = False): self.rawValue = unescapeString(self.rawValue) self.value = self.rawValue ''' - self.value = self.value.replace('\)',')') - self.value = self.value.replace('\\\\','\\') - self.value = self.value.replace('\\\r\\\n','') - self.value = self.value.replace('\\\r','') - self.value = self.value.replace('\\\n','') + self.value = self.value.replace('\)', ')') + self.value = self.value.replace('\\\\', '\\') + self.value = self.value.replace('\\\r\\\n', '') + self.value = self.value.replace('\\\r', '') + self.value = self.value.replace('\\\n', '') ''' octalNumbers = re.findall('\\\\([0-7]{1,3})', self.value, re.DOTALL) try: for octal in octalNumbers: - #TODO: check!! \\\\? - self.value = self.value.replace('\\' + octal, chr(int(octal,8))) + # TODO: check!! \\\\? + self.value = self.value.replace('\\' + octal, chr(int(octal, 8))) except: errorMessage = 'Error in octal conversion' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) if isJavascript(self.value) or self.referencedJSObject: self.containsJScode = True self.JSCode, self.unescapedBytes, self.urlsFound, jsErrors, jsContexts['global'] = analyseJS(self.value, jsContexts['global'], isManualAnalysis) @@ -627,107 +641,107 @@ def update(self, decrypt = False): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if self.encrypted and not decrypt: ret = self.encrypt() if ret[0] == -1: return ret - return (0,'') + return (0, '') def encodeChars(self): - ret = encodeString(self.value) - if ret[0] == -1: - self.addError(ret[1]) - return ret - else: - self.rawValue = ret[1] - return (0,'') - - def encrypt(self, password = None): + ret = encodeString(self.value) + if ret[0] == -1: + self.addError(ret[1]) + return ret + else: + self.rawValue = ret[1] + return (0, '') + + def encrypt(self, password=None): self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password try: - self.encryptedValue = RC4(self.rawValue,self.encryptionKey) + self.encryptedValue = RC4(self.rawValue, self.encryptionKey) except: errorMessage = 'Error encrypting with RC4' self.addError(errorMessage) - return (-1,errorMessage) - return (0,'') + return (-1, errorMessage) + return (0, '') - def decrypt(self, password = None, algorithm = 'RC4'): + def decrypt(self, password=None, algorithm='RC4'): ''' - Decrypt the content of the object if possible - + Decrypt the content of the object if possible + @param password: The password used to decrypt the object. It's dependent on the object. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password try: cleanString = unescapeString(self.encryptedValue) if algorithm == 'RC4': - self.rawValue = RC4(cleanString,self.encryptionKey) + self.rawValue = RC4(cleanString, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(cleanString,self.encryptionKey) + ret = AES.decryptData(cleanString, self.encryptionKey) if ret[0] != -1: self.rawValue = ret[1] else: errorMessage = 'AES decryption error: '+ret[1] self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) except: errorMessage = 'Error decrypting with '+str(algorithm) self.addError(errorMessage) - return (-1,errorMessage) - ret = self.update(decrypt = True) - return (0,'') - + return (-1, errorMessage) + ret = self.update(decrypt=True) + return (0, '') + def getEncryptedValue(self): - return '('+escapeString(self.encryptedValue)+')' - + return '(' + escapeString(self.encryptedValue) + ')' + def getJSCode(self): ''' - Gets the Javascript code of the object - + Gets the Javascript code of the object + @return: An array of Javascript code sections ''' - return self.JSCode - + return self.JSCode + def getRawValue(self): return '('+escapeString(self.rawValue)+')' - + def getUnescapedBytes(self): ''' - Gets the escaped bytes of the object unescaped - + Gets the escaped bytes of the object unescaped + @return: An array of unescaped bytes (string) ''' return self.unescapedBytes - + def getURLs(self): ''' - Gets the URLs of the object - + Gets the URLs of the object + @return: An array of URLs ''' return self.urlsFound -class PDFHexString (PDFObject) : +class PDFHexString(PDFObject): ''' Hexadecimal string object of a PDF document ''' - def __init__(self, hex) : + def __init__(self, hex): self.asciiValue = '' self.type = 'hexstring' self.errors = [] self.compressedIn = None self.encrypted = False - self.value = '' # Value after hex decoding and decryption - self.rawValue = hex # Hex characters - self.encryptedValue = hex # Value after hex decoding + self.value = '' # Value after hex decoding and decryption + self.rawValue = hex # Hex characters + self.encryptedValue = hex # Value after hex decoding self.updateNeeded = False self.containsJScode = False self.referencedJSObject = False @@ -742,12 +756,12 @@ def __init__(self, hex) : if isForceMode: self.addError(ret[1]) else: - raise Exception(ret[1]) - - def update(self, decrypt = False, newHexValue = True): + raise Exception(ret[1]) + + def update(self, decrypt=False, newHexValue=True): ''' Updates the object after some modification has occurred - + @param decrypt: A boolean indicating if a decryption has been performed. By default: False. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -764,15 +778,17 @@ def update(self, decrypt = False, newHexValue = True): tmpValue = self.rawValue if len(tmpValue) % 2 != 0: tmpValue += '0' - self.value = tmpValue.decode('hex') + self.value = codecs.decode(tmpValue, 'hex') + if six.PY3: + self.value = self.value.decode('latin-1') else: # New decoded value - self.rawValue = self.value.encode('hex') + self.rawValue = codecs.encode(self.value, 'hex') self.encryptedValue = self.value except: errorMessage = 'Error in hexadecimal conversion' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) if isJavascript(self.value) or self.referencedJSObject: self.containsJScode = True self.JSCode, self.unescapedBytes, self.urlsFound, jsErrors, jsContexts['global'] = analyseJS(self.value, jsContexts['global'], isManualAnalysis) @@ -782,53 +798,53 @@ def update(self, decrypt = False, newHexValue = True): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if self.encrypted and not decrypt: ret = self.encrypt() if ret[0] == -1: return ret - return (0,'') + return (0, '') - def encrypt(self, password = None): + def encrypt(self, password=None): self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password try: - self.encryptedValue = RC4(self.value,self.encryptionKey) + self.encryptedValue = RC4(self.value, self.encryptionKey) self.rawValue = self.encryptedValue.encode('hex') except: errorMessage = 'Error encrypting with RC4' self.addError(errorMessage) - return (-1,errorMessage) - return (0,'') - - def decrypt(self, password = None, algorithm = 'RC4'): + return (-1, errorMessage) + return (0, '') + + def decrypt(self, password=None, algorithm='RC4'): ''' - Decrypt the content of the object if possible - + Decrypt the content of the object if possible + @param password: The password used to decrypt the object. It's dependent on the object. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password try: cleanString = unescapeString(self.encryptedValue) if algorithm == 'RC4': - self.value = RC4(cleanString,self.encryptionKey) + self.value = RC4(cleanString, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(cleanString,self.encryptionKey) + ret = AES.decryptData(cleanString, self.encryptionKey) if ret[0] != -1: self.value = ret[1] else: errorMessage = 'AES decryption error: '+ret[1] self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) except: errorMessage = 'Error decrypting with '+str(algorithm) self.addError(errorMessage) - return (-1,errorMessage) - ret = self.update(decrypt = True) + return (-1, errorMessage) + ret = self.update(decrypt=True) return ret def getEncryptedValue(self): @@ -836,8 +852,8 @@ def getEncryptedValue(self): def getJSCode(self): ''' - Gets the Javascript code of the object - + Gets the Javascript code of the object + @return: An array of Javascript code sections ''' return self.JSCode @@ -847,26 +863,26 @@ def getRawValue(self): def getUnescapedBytes(self): ''' - Gets the escaped bytes of the object unescaped - + Gets the escaped bytes of the object unescaped + @return: An array of unescaped bytes (string) ''' return self.unescapedBytes - + def getURLs(self): ''' - Gets the URLs of the object - + Gets the URLs of the object + @return: An array of URLs ''' return self.urlsFound -class PDFReference (PDFObject) : +class PDFReference(PDFObject): ''' Reference object of a PDF document ''' - def __init__(self, id, genNumber = '0') : + def __init__(self, id, genNumber='0'): self.type = 'reference' self.errors = [] self.JSCode = [] @@ -887,7 +903,7 @@ def __init__(self, id, genNumber = '0') : self.addError(ret[1]) else: raise Exception(ret[1]) - + def update(self): self.errors = [] self.value = self.encryptedValue = self.rawValue @@ -898,29 +914,29 @@ def update(self): else: errorMessage = 'Error getting PDFReference elements' self.addError(errorMessage) - return (-1,errorMessage) - return (0,'') - + return (-1, errorMessage) + return (0, '') + def getGenNumber(self): ''' Gets the generation number of the reference - + @return: The generation number (int) ''' return self.genNumber - + def getId(self): ''' Gets the object id of the reference - + @return: The object id (int) ''' return self.id - + def setGenNumber(self, newGenNumber): ''' Sets the generation number of the reference - + @param newGenNumber: The new generation number (int) ''' self.genNumber = newGenNumber @@ -928,17 +944,17 @@ def setGenNumber(self, newGenNumber): def setId(self, newId): ''' Sets the object id of the reference - + @param newId: The new object id (int) ''' self.id = newId -class PDFArray (PDFObject) : +class PDFArray(PDFObject): ''' Array object of a PDF document ''' - def __init__(self, rawContent = '', elements = []) : + def __init__(self, rawContent='', elements=[]): self.type = 'array' self.errors = [] self.JSCode = [] @@ -961,10 +977,10 @@ def __init__(self, rawContent = '', elements = []) : else: raise Exception(ret[1]) - def update(self, decrypt = False): + def update(self, decrypt=False): ''' Updates the object after some modification has occurred - + @param decrypt: A boolean indicating if a decryption has been performed. By default: False. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -979,7 +995,7 @@ def update(self, decrypt = False): self.unescapedBytes = [] self.urlsFound = [] for element in self.elements: - if element != None: + if element is not None: type = element.getType() if type == 'reference': self.references.append(element.getValue()) @@ -993,7 +1009,7 @@ def update(self, decrypt = False): if element.isFaulty(): for error in element.getErrors(): self.addError('Children element contains errors: ' + error) - if type in ['string','hexstring','array','dictionary'] and self.encrypted and not decrypt: + if type in ['string', 'hexstring', 'array', 'dictionary'] and self.encrypted and not decrypt: ret = element.encrypt(self.encryptionKey) if ret[0] == -1: errorMessage = 'Error encrypting element' @@ -1008,54 +1024,54 @@ def update(self, decrypt = False): self.rawValue = self.rawValue[:-1] + ' ]' self.value = self.value[:-1] + ' ]' if errorMessage != '': - return (-1,'Errors while updating PDFArray') + return (-1, 'Errors while updating PDFArray') else: - return (0,'') - + return (0, '') + def addElement(self, element): ''' Adds an element to the array - + @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' self.elements.append(element) ret = self.update() return ret - - def decrypt(self, password = None, algorithm = 'RC4'): + + def decrypt(self, password=None, algorithm='RC4'): ''' - Decrypt the content of the object if possible - + Decrypt the content of the object if possible + @param password: The password used to decrypt the object. It's dependent on the object. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 - ''' + ''' errorMessage = '' self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password decryptedElements = [] for element in self.elements: - if element != None: + if element is not None: type = element.getType() - if type in ['string','hexstring','array','dictionary']: + if type in ['string', 'hexstring', 'array', 'dictionary']: ret = element.decrypt(self.encryptionKey, algorithm) if ret[0] == -1: errorMessage = ret[1] self.addError(errorMessage) decryptedElements.append(element) self.elements = decryptedElements - ret = self.update(decrypt = True) + ret = self.update(decrypt=True) if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret def encodeChars(self): errorMessage = '' encodedElements = [] for element in self.elements: - if element != None: + if element is not None: type = element.getType() - if type in ['string','name','array','dictionary']: + if type in ['string', 'name', 'array', 'dictionary']: ret = element.encodeChars() if ret[0] == -1: errorMessage = ret[1] @@ -1064,42 +1080,42 @@ def encodeChars(self): self.elements = encodedElements ret = self.update() if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret - - def encrypt(self, password = None): + + def encrypt(self, password=None): self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password ret = self.update() return ret - + def getElementByName(self, name): ''' Gets the dictionary elements with the given name - + @param name: The name @return: An array of elements ''' retElements = [] for element in self.elements: - if element != None: + if element is not None: if element.getType() == 'dictionary' or element.getType() == 'array': retElements += element.getElementByName(name) else: errorMessage = 'None elements' self.addError(errorMessage) return retElements - + def getElementRawValues(self): ''' Gets the raw values of each element - + @return: An array of values ''' values = [] for element in self.elements: - if element != None: + if element is not None: values.append(element.getRawValue()) else: values.append(None) @@ -1110,12 +1126,12 @@ def getElementRawValues(self): def getElementValues(self): ''' Gets the values of each element - + @return: An array of values ''' values = [] for element in self.elements: - if element != None: + if element is not None: values.append(element.getValue()) else: values.append(None) @@ -1126,7 +1142,7 @@ def getElementValues(self): def getElements(self): ''' Gets the elements of the array object - + @return: An array of PDFObject elements ''' return self.elements @@ -1134,7 +1150,7 @@ def getElements(self): def getNumElements(self): ''' Gets the number of elements of the array - + @return: The number of elements (int) ''' return len(self.elements) @@ -1142,12 +1158,12 @@ def getNumElements(self): def hasElement(self, name): ''' Specifies if the array contains the element with the given name - + @param name: The element @return: A boolean ''' for element in self.elements: - if element != None: + if element is not None: if element.getType() == 'dictionary': if element.hasElement(name): return True @@ -1169,7 +1185,7 @@ def replace(self, string1, string2): if errorMessage == 'String not found': errorMessage = '' for element in self.elements: - if element != None: + if element is not None: ret = element.replace(string1, string2) if ret[0] == -1: if ret[1] != 'String not found' or not stringFound: @@ -1183,17 +1199,17 @@ def replace(self, string1, string2): errorMessage = 'None element while replacing strings' self.addError('None element') if not stringFound: - return (-1,'String not found') + return (-1, 'String not found') self.elements = newElements ret = self.update() if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret - + def setElements(self, newElements): ''' Sets the array of elements - + @param newElements: The new array of elements @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -1201,9 +1217,16 @@ def setElements(self, newElements): ret = self.update() return ret + def getJSCode(self): + ''' + Gets the Javascript code of the object + @return: An array of Javascript code sections + ''' + return self.JSCode + -class PDFDictionary (PDFObject): - def __init__(self, rawContent = '', elements = {}, rawNames = {}) : +class PDFDictionary(PDFObject): + def __init__(self, rawContent='', elements={}, rawNames={}): self.type = 'dictionary' self.dictType = '' self.errors = [] @@ -1231,11 +1254,11 @@ def __init__(self, rawContent = '', elements = {}, rawNames = {}) : self.addError(ret[1]) else: raise Exception(ret[1]) - - def update(self, decrypt = False): + + def update(self, decrypt=False): ''' Updates the object after some modification has occurred - + @param decrypt: A boolean indicating if a decryption has been performed. By default: False. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -1252,16 +1275,16 @@ def update(self, decrypt = False): self.value = '<< ' self.rawValue = '<< ' self.encryptedValue = '<< ' - keys = self.elements.keys() - values = self.elements.values() + keys = list(self.elements.keys()) + values = list(self.elements.values()) for i in range(len(keys)): - if values[i] == None: + if values[i] is None: errorMessage = 'Non-existing value for key "'+str(keys[i])+'"' if isForceMode: self.addError(errorMessage) valueObject = PDFString('') else: - return (-1,errorMessage) + return (-1, errorMessage) else: valueObject = values[i] v = valueObject.getValue() @@ -1293,13 +1316,13 @@ def update(self, decrypt = False): if valueObject.isFaulty(): for error in valueObject.getErrors(): self.addError('Children element contains errors: ' + error) - if self.rawNames.has_key(keys[i]): + if keys[i] in self.rawNames: rawName = self.rawNames[keys[i]] rawValue = rawName.getRawValue() else: rawValue = keys[i] self.rawNames[keys[i]] = PDFName(keys[i][1:]) - if type in ['string','hexstring','array','dictionary'] and self.encrypted and not decrypt: + if type in ['string', 'hexstring', 'array', 'dictionary'] and self.encrypted and not decrypt: ret = valueObject.encrypt(self.encryptionKey) if ret[0] == -1: errorMessage = 'Error encrypting element' @@ -1311,52 +1334,52 @@ def update(self, decrypt = False): self.rawValue = self.rawValue[:-1] + ' >>' self.value = self.value[:-1] + ' >>' if errorMessage != '': - return (-1,errorMessage) - return (0,'') - - def decrypt(self, password = None, algorithm = 'RC4'): + return (-1, errorMessage) + return (0, '') + + def decrypt(self, password=None, algorithm='RC4'): ''' - Decrypt the content of the object if possible - + Decrypt the content of the object if possible + @param password: The password used to decrypt the object. It's dependent on the object. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' self.encrypted = True errorMessage = '' - if password != None: + if password is not None: self.encryptionKey = password decryptedElements = {} for key in self.elements: object = self.elements[key] objectType = object.getType() - if objectType in ['string','hexstring','array','dictionary']: + if objectType in ['string', 'hexstring', 'array', 'dictionary']: ret = object.decrypt(self.encryptionKey, algorithm) if ret[0] == -1: errorMessage = ret[1] self.addError(errorMessage) decryptedElements[key] = object self.elements = decryptedElements - ret = self.update(decrypt = True) + ret = self.update(decrypt=True) if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret - - def delElement(self, name, update = True): + + def delElement(self, name, update=True): ''' Removes the element from the dictionary - + @param name: The element to remove @param update: A boolean indicating if it's necessary an update of the object. By default: True. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' - if self.elements.has_key(name): + if name in self.elements: del(self.elements[name]) if update: ret = self.update() return ret - return (0,'') + return (0, '') else: - return (-1,'Element not found') + return (-1, 'Element not found') def encodeChars(self): encodedElements = {} @@ -1367,7 +1390,7 @@ def encodeChars(self): self.rawNames[key] = rawName object = self.elements[key] objectType = object.getType() - if objectType in ['string','name','array','dictionary']: + if objectType in ['string', 'name', 'array', 'dictionary']: ret = object.encodeChars() if ret[0] == -1: errorMessage = ret[1] @@ -1376,12 +1399,12 @@ def encodeChars(self): self.elements = encodedElements ret = self.update() if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret - - def encrypt(self, password = None): + + def encrypt(self, password=None): self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password ret = self.update() return ret @@ -1389,7 +1412,7 @@ def encrypt(self, password = None): def getDictType(self): ''' Gets the type of dictionary - + @return: The dictionary type (string) ''' return self.dictType @@ -1397,58 +1420,58 @@ def getDictType(self): def getElement(self, name): ''' Gets the element of the dictionary with the given name - + @param name: The name of element @return: The PDFObject or None if it's not found ''' - if self.elements.has_key(name): + if name in self.elements: return self.elements[name] else: return None - def getElementByName(self, name, recursive = False): + def getElementByName(self, name, recursive=False): ''' Gets the elements with the given name - + @param name: The name @param recursive: A boolean indicating if the search is recursive or not. By default: False. @return: A PDFObject if recursive = False and an array of PDFObjects if recursive = True. ''' retElements = [] - if self.elements.has_key(name): + if name in self.elements: if recursive: retElements.append(self.elements[name]) else: return self.elements[name] if recursive: - for element in self.elements.values(): - if element != None and (element.getType() == 'dictionary' or element.getType() == 'array'): + for element in list(self.elements.values()): + if element is not None and (element.getType() == 'dictionary' or element.getType() == 'array'): retElements += element.getElementByName(name) return retElements - + def getElements(self): ''' Gets the elements of the array object - + @return: An array of PDFObject elements ''' return self.elements - + def getJSCode(self): ''' - Gets the Javascript code of the object - + Gets the Javascript code of the object + @return: An array of Javascript code sections ''' return self.JSCode - + def getNumElements(self): ''' Gets the number of elements of the array - + @return: The number of elements (int) ''' - return len(self.elements) + return len(self.elements) def getReferencedJSObjectIds(self): ''' @@ -1476,11 +1499,11 @@ def getStats(self): stats['Type'] = self.dictType else: stats['Type'] = None - if self.elements.has_key('/Subtype'): + if "/Subtype" in self.elements: stats['Subtype'] = self.elements['/Subtype'].getValue() else: stats['Subtype'] = None - if self.elements.has_key('/S'): + if "/S" in self.elements: stats['Action type'] = self.elements['/S'].getValue() else: stats['Action type'] = None @@ -1497,11 +1520,11 @@ def getStats(self): else: stats['JSCode'] = False return stats - + def getUnescapedBytes(self): ''' - Gets the escaped bytes of the object unescaped - + Gets the escaped bytes of the object unescaped + @return: An array of unescaped bytes (string) ''' return self.unescapedBytes @@ -1516,24 +1539,24 @@ def getURIs(self): def getURLs(self): ''' - Gets the URLs of the object - + Gets the URLs of the object + @return: An array of URLs ''' return self.urlsFound - + def hasElement(self, name): ''' Specifies if the dictionary contains the element with the given name - + @param name: The element @return: A boolean ''' - if self.elements.has_key(name): + if name in self.elements: return True else: return False - + def replace(self, string1, string2): newElements = {} stringFound = False @@ -1547,7 +1570,7 @@ def replace(self, string1, string2): else: newKey = key newObject = self.elements[key] - if newObject != None: + if newObject is not None: ret = newObject.replace(string1, string2) if ret[0] == -1: if ret[1] != 'String not found' or not stringFound: @@ -1558,19 +1581,19 @@ def replace(self, string1, string2): errorMessage = '' newElements[newKey] = newObject if not stringFound: - return (-1,'String not found') + return (-1, 'String not found') self.elements = newElements ret = self.update() if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret - def setElement(self, name, value, update = True): + def setElement(self, name, value, update=True): ''' Sets the element with the given name to the given value. If it does not exist a new element is created. - + @param name: The element to add or modify - @param value: The new value of the element + @param value: The new value of the element @param update: A boolean indicating if it's necessary an update of the object. By default: True. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -1578,12 +1601,12 @@ def setElement(self, name, value, update = True): if update: ret = self.update() return ret - return (0,'') + return (0, '') def setElements(self, newElements): ''' Sets the dictionary of elements - + @param newElements: The new dictionary of elements @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -1591,31 +1614,30 @@ def setElements(self, newElements): ret = self.update() return ret - def setElementValue(self, name, value, update = True): + def setElementValue(self, name, value, update=True): ''' Sets the value of the element with the given name. - + @param name: The element to modify - @param value: The new value of the element + @param value: The new value of the element @param update: A boolean indicating if it's necessary an update of the object. By default: True. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' - if self.elements.has_key(name): + if name in self.elements: self.elements[name].setValue(value) if update: ret = self.update() return ret - return (0,'') + return (0, '') else: - return (-1,'Element not found') + return (-1, 'Element not found') -class PDFStream (PDFDictionary) : +class PDFStream (PDFDictionary): ''' Stream object of a PDF document ''' - def __init__(self, rawDict = '', rawStream = '', elements = {}, rawNames = {}) : - global isForceMode + def __init__(self, rawDict='', rawStream='', elements={}, rawNames={}): self.type = 'stream' self.dictType = '' self.errors = [] @@ -1666,7 +1688,7 @@ def __init__(self, rawDict = '', rawStream = '', elements = {}, rawNames = {}) : def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): ''' Updates the object after some modification has occurred - + @param onlyElements: A boolean indicating if it's only necessary to update the stream dictionary or also the stream itself. By default: False (stream included). @param decrypt: A boolean indicating if a decryption has been performed. By default: False. @param algorithm: A string indicating the algorithm to use for decryption @@ -1675,8 +1697,8 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): self.value = '<< ' self.rawValue = '<< ' self.encryptedValue = '<< ' - keys = self.elements.keys() - values = self.elements.values() + keys = list(self.elements.keys()) + values = list(self.elements.values()) if not onlyElements: self.references = [] self.errors = [] @@ -1685,47 +1707,47 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): self.urlsFound = [] self.containsJScode = False self.decodingError = False - + # Dictionary - if self.elements.has_key('/Type') and self.elements['/Type'] != None: + if "/Type" in self.elements and self.elements['/Type'] is not None: if self.elements['/Type'].getValue() == '/XRef': self.xrefStream = True - if self.elements.has_key('/Length'): + if "/Length" in self.elements: length = self.elements['/Length'] - if length != None: + if length is not None: if length.getType() == 'integer': self.size = length.getRawValue() elif length.getType() == 'reference': self.updateNeeded = True - self.referencesInElements['/Length'] = [length.getId(),''] + self.referencesInElements['/Length'] = [length.getId(), ''] else: if isForceMode: self.addError('No permitted type for /Length element') else: - return (-1,'No permitted type for /Length element') + return (-1, 'No permitted type for /Length element') else: if isForceMode: self.addError('None /Length element') else: - return (-1,'None /Length element') + return (-1, 'None /Length element') else: if isForceMode: self.addError('Missing /Length in stream object') else: - return (-1,'Missing /Length in stream object') - - if self.elements.has_key('/F'): + return (-1, 'Missing /Length in stream object') + + if "/F" in self.elements: self.file = self.elements['/F'].getValue() if os.path.exists(self.file): - self.rawStream = open(self.file,'rb').read() + self.rawStream = open(self.file, 'rb').read() else: if isForceMode: self.addError('File "'+self.file+'" does not exist (/F)') self.rawStream = '' else: - return (-1,'File "'+self.file+'" does not exist (/F)') - - if self.elements.has_key('/Filter'): + return (-1, 'File "'+self.file+'" does not exist (/F)') + + if "/Filter" in self.elements: self.filter = self.elements['/Filter'] if self.newFilters or self.modifiedStream: self.encodedStream = '' @@ -1733,7 +1755,7 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): elif not self.encrypted: self.encodedStream = self.rawStream self.isEncodedStream = True - elif self.elements.has_key('/FFilter'): + elif "/FFilter" in self.elements: self.filter = self.elements['/FFilter'] if self.newFilters or self.modifiedStream: self.encodedStream = '' @@ -1749,25 +1771,25 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): self.decodedStream = self.rawStream self.isEncodedStream = False if self.isEncodedStream: - if self.elements.has_key('/DecodeParms'): + if "/DecodeParms" in self.elements: self.filterParams = self.elements['/DecodeParms'] - elif self.elements.has_key('/FDecodeParms'): + elif "/FDecodeParms" in self.elements: self.filterParams = self.elements['/FDecodeParms'] - elif self.elements.has_key('/DP'): + elif "/DP" in self.elements: self.filterParams = self.elements['/DP'] else: self.filterParams = None - + for i in range(len(keys)): valueElement = values[i] - if valueElement == None: + if valueElement is None: errorMessage = 'Stream dictionary has a None value' self.addError(errorMessage) valueElement = PDFString('') v = valueElement.getValue() type = valueElement.getType() if type == 'reference': - if v not in self.references: + if v not in self.references: self.references.append(v) elif type == 'dictionary' or type == 'array': self.references = list(set(self.references + valueElement.getReferences())) @@ -1779,13 +1801,13 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if valueElement.isFaulty(): for error in valueElement.getErrors(): self.addError('Children element contains errors: ' + error) - if self.rawNames.has_key(keys[i]): + if keys[i] in self.rawNames: rawName = self.rawNames[keys[i]] rawValue = rawName.getRawValue() else: rawValue = keys[i] self.rawNames[keys[i]] = PDFName(keys[i][1:]) - if type in ['string','hexstring','array','dictionary'] and self.encrypted and not decrypt: + if type in ['string', 'hexstring', 'array', 'dictionary'] and self.encrypted and not decrypt: ret = valueElement.encrypt(self.encryptionKey) if ret[0] == -1: errorMessage = ret[1]+' in child element' @@ -1796,20 +1818,20 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): self.encryptedValue = self.encryptedValue[:-1] + ' >>' self.rawValue = self.rawValue[:-1] + ' >>' self.value = self.value[:-1] + ' >>' - + if not onlyElements: # Stream if self.deletedFilters or self.newFilters or self.modifiedStream or self.modifiedRawStream or self.encrypted: if self.deletedFilters: if self.encrypted: try: - self.rawStream = RC4(self.decodedStream,self.encryptionKey) + self.rawStream = RC4(self.decodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.decodedStream) @@ -1818,13 +1840,13 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if ret[0] != -1: if self.encrypted: try: - self.rawStream = RC4(self.encodedStream,self.encryptionKey) + self.rawStream = RC4(self.encodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.encodedStream) @@ -1842,32 +1864,32 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if self.isEncodedStream: ret = self.encode() if ret[0] != -1: if self.encrypted: try: - self.rawStream = RC4(self.encodedStream,self.encryptionKey) + self.rawStream = RC4(self.encodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.encodedStream) else: if self.encrypted: try: - self.rawStream = RC4(self.decodedStream,self.encryptionKey) + self.rawStream = RC4(self.decodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.decodedStream) @@ -1880,9 +1902,9 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if decrypt: try: if algorithm == 'RC4': - self.encodedStream = RC4(self.encodedStream,self.encryptionKey) + self.encodedStream = RC4(self.encodedStream, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(self.encodedStream,self.encryptionKey) + ret = AES.decryptData(self.encodedStream, self.encryptionKey) if ret[0] != -1: self.encodedStream = ret[1] else: @@ -1890,29 +1912,29 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) except: errorMessage = 'Error decrypting stream with '+str(algorithm) if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.encodedStream = self.rawStream try: - self.rawStream = RC4(self.rawStream,self.encryptionKey) + self.rawStream = RC4(self.rawStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.decode() else: if not decrypt: - self.decodedStream = self.rawStream + self.decodedStream = self.rawStream try: - rc4Result = RC4(self.rawStream,self.encryptionKey) + rc4Result = RC4(self.rawStream, self.encryptionKey) if decrypt: self.decodedStream = rc4Result else: @@ -1922,7 +1944,7 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: if self.isEncodedStream: self.decode() @@ -1941,28 +1963,28 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: if not decrypt: try: if self.isEncodedStream: - self.rawStream = RC4(self.encodedStream,self.encryptionKey) + self.rawStream = RC4(self.encodedStream, self.encryptionKey) else: - self.rawStream = RC4(self.decodedStream,self.encryptionKey) + self.rawStream = RC4(self.decodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: if self.isEncodedStream: try: if algorithm == 'RC4': - self.encodedStream = RC4(self.encodedStream,self.encryptionKey) + self.encodedStream = RC4(self.encodedStream, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(self.encodedStream,self.encryptionKey) + ret = AES.decryptData(self.encodedStream, self.encryptionKey) if ret[0] != -1: self.encodedStream = ret[1] else: @@ -1970,20 +1992,20 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) except: errorMessage = 'Error decrypting stream with '+str(algorithm) if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.decode() else: try: if algorithm == 'RC4': - self.decodedStream = RC4(self.decodedStream,self.encryptionKey) + self.decodedStream = RC4(self.decodedStream, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(self.decodedStream,self.encryptionKey) + ret = AES.decryptData(self.decodedStream, self.encryptionKey) if ret[0] != -1: self.decodedStream = ret[1] else: @@ -1991,13 +2013,13 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) except: errorMessage = 'Error decrypting stream with '+str(algorithm) if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if not self.isFaultyDecoding(): refs = re.findall('(\d{1,5}\s{1,3}\d{1,5}\s{1,3}R)', self.decodedStream) if refs != []: @@ -2012,30 +2034,30 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if not self.modifiedRawStream: self.modifiedStream = False self.newFilters = False self.deletedFilters = False errors = self.errors try: - self.setElement('/Length',PDFNum(str(self.size))) + self.setElement('/Length', PDFNum(str(self.size))) self.errors += errors except: errorMessage = 'Error creating PDFNum' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.modifiedRawStream = False self.modifiedStream = False self.newFilters = False self.deletedFilters = False if self.errors != []: - return (-1,self.errors[-1]) + return (-1, self.errors[-1]) else: - return (0,'') + return (0, '') def cleanStream(self): ''' @@ -2043,10 +2065,10 @@ def cleanStream(self): ''' if self.isEncodedStream: stream = self.encodedStream - tmpStream = self.encodedStream + # tmpStream = self.encodedStream else: stream = self.decodedStream - tmpStream = self.decodedStream + # tmpStream = self.decodedStream ''' garbage = len(stream) - self.size if garbage > 0: @@ -2092,32 +2114,32 @@ def contains(self, string): rawStream = str(self.rawStream) encStream = str(self.encodedStream) decStream = str(self.decodedStream) - if re.findall(string,value,re.IGNORECASE) != [] or re.findall(string,rawValue,re.IGNORECASE) != [] or re.findall(string,encValue,re.IGNORECASE) != [] or re.findall(string,rawStream,re.IGNORECASE) != [] or re.findall(string,encStream,re.IGNORECASE) != [] or re.findall(string,decStream,re.IGNORECASE) != []: + if re.findall(string, value, re.IGNORECASE) != [] or re.findall(string, rawValue, re.IGNORECASE) != [] or re.findall(string, encValue, re.IGNORECASE) != [] or re.findall(string, rawStream, re.IGNORECASE) != [] or re.findall(string, encStream, re.IGNORECASE) != [] or re.findall(string, decStream, re.IGNORECASE) != []: return True if self.containsJS(): for js in self.JSCode: - if re.findall(string,js,re.IGNORECASE) != []: + if re.findall(string, js, re.IGNORECASE) != []: return True - return False - - def decode (self) : + return False + + def decode(self): ''' - Decodes the stream and stores the result in decodedStream - + Decodes the stream and stores the result in decodedStream + @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' errorMessage = '' if len(self.rawStream) > 0: if self.isEncodedStream: - if self.filter == None: + if self.filter is None: errorMessage = 'Bad /Filter element' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) filterType = self.filter.getType() - if self.filterParams != None: + if self.filterParams is not None: filterParamsType = self.filterParams.getType() if filterType == 'name': - if self.filterParams == None: + if self.filterParams is None: ret = decodeStream(self.encodedStream, self.filter.getValue(), self.filterParams) if ret[0] == -1: if self.rawStream != self.encodedStream: @@ -2129,13 +2151,13 @@ def decode (self) : self.addError(errorMessage) self.decodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.decodedStream = ret[1] else: self.decodedStream = ret[1] elif filterParamsType == 'dictionary': - ret = decodeStream(self.encodedStream, self.filter.getValue(), self.filterParams.getElements()) + ret = decodeStream(self.encodedStream, self.filter.getValue(), self.filterParams.getElements()) if ret[0] == -1: if self.rawStream != self.encodedStream: ret = decodeStream(self.rawStream, self.filter.getValue(), self.filterParams.getElements()) @@ -2146,7 +2168,7 @@ def decode (self) : self.addError(errorMessage) self.decodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.decodedStream = ret[1] else: @@ -2157,20 +2179,20 @@ def decode (self) : self.addError(errorMessage) self.decodedStream = '' else: - return (-1,'Filter parameters type is not valid') + return (-1, 'Filter parameters type is not valid') elif filterType == 'array': self.decodedStream = self.encodedStream filterElements = self.filter.getElements() for i in range(len(filterElements)): filter = filterElements[i] - if filter == None: + if filter is None: if isForceMode: errorMessage = 'Bad /Filter element in PDFArray' self.addError(errorMessage) continue - return (-1,'Bad /Filter element in PDFArray') + return (-1, 'Bad /Filter element in PDFArray') if filter.getType() == 'name': - if self.filterParams == None: + if self.filterParams is None: ret = decodeStream(self.decodedStream, filter.getValue(), self.filterParams) if ret[0] == -1: if i == 0 and self.rawStream != self.encodedStream: @@ -2182,7 +2204,7 @@ def decode (self) : self.addError(errorMessage) self.decodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.decodedStream = ret[1] else: @@ -2194,12 +2216,12 @@ def decode (self) : paramsDict = {} else: paramsObj = paramsArray[i] - if paramsObj == None: + if paramsObj is None: if isForceMode: errorMessage = 'Bad /FilterParms element in PDFArray' self.addError(errorMessage) continue - return (-1,'Bad /FilterParms element in PDFArray') + return (-1, 'Bad /FilterParms element in PDFArray') paramsObjType = paramsObj.getType() if paramsObjType == 'dictionary': paramsDict = paramsObj.getElements() @@ -2216,7 +2238,7 @@ def decode (self) : self.addError(errorMessage) self.decodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.decodedStream = ret[1] else: @@ -2227,83 +2249,83 @@ def decode (self) : self.addError(errorMessage) self.decodedStream = '' else: - return (-1,'One of the filters parameters type is not valid') + return (-1, 'One of the filters parameters type is not valid') else: if isForceMode: errorMessage = 'One of the filters type is not valid' self.addError(errorMessage) self.decodedStream = '' else: - return (-1,'One of the filters type is not valid') + return (-1, 'One of the filters type is not valid') else: if isForceMode: errorMessage = 'Filter type is not valid' self.addError(errorMessage) self.decodedStream = '' else: - return (-1,'Filter type is not valid') + return (-1, 'Filter type is not valid') if errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) else: - return (0,'') + return (0, '') else: - return (-1,'Not encoded stream') + return (-1, 'Not encoded stream') else: - return (-1,'Empty stream') + return (-1, 'Empty stream') - def decrypt(self, password = None, strAlgorithm = 'RC4', altAlgorithm = 'RC4'): + def decrypt(self, password=None, strAlgorithm='RC4', altAlgorithm='RC4'): ''' - Decrypt the content of the object if possible - + Decrypt the content of the object if possible + @param password: The password used to decrypt the object. It's dependent on the object. @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' errorMessage = '' self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password decryptedElements = {} for key in self.elements: object = self.elements[key] objectType = object.getType() - if objectType in ['string','hexstring','array','dictionary']: + if objectType in ['string', 'hexstring', 'array', 'dictionary']: ret = object.decrypt(self.encryptionKey, strAlgorithm) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) decryptedElements[key] = object self.elements = decryptedElements - ret = self.update(decrypt = True, algorithm = altAlgorithm) + ret = self.update(decrypt=True, algorithm=altAlgorithm) if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret - - def delElement(self, name, update = True): + + def delElement(self, name, update=True): onlyElements = True - if self.elements.has_key(name): - if name in ['/Filter','/DecodeParm','/FFilter','/FDecodeParm']: + if name in self.elements: + if name in ['/Filter', '/DecodeParm', '/FFilter', '/FDecodeParm']: self.deletedFilters = True onlyElements = False del(self.elements[name]) if update: - ret = self.update(onlyElements = onlyElements) + ret = self.update(onlyElements=onlyElements) return ret else: - return (-1,'Element not found') + return (-1, 'Element not found') - def encode (self) : + def encode(self): ''' Encode the decoded stream and update the content of rawStream ''' errorMessage = '' if len(self.decodedStream) > 0: - if self.filter == None: - return (-1,'Bad /Filter element') + if self.filter is None: + return (-1, 'Bad /Filter element') filterType = self.filter.getType() - if self.filterParams != None: + if self.filterParams is not None: filterParamsType = self.filterParams.getType() if filterType == 'name': - if self.filterParams == None: + if self.filterParams is None: ret = encodeStream(self.decodedStream, self.filter.getValue(), self.filterParams) if ret[0] == -1: errorMessage = 'Encoding error: '+ret[1] @@ -2311,7 +2333,7 @@ def encode (self) : self.addError(errorMessage) self.encodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.rawStream = ret[1] elif filterParamsType == 'dictionary': @@ -2322,37 +2344,37 @@ def encode (self) : self.addError(errorMessage) self.encodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: - self.rawStream = ret[1] + self.rawStream = ret[1] else: if isForceMode: errorMessage = 'Filter parameters type is not valid' self.addError(errorMessage) self.encodedStream = '' else: - return (-1,'Filter parameters type is not valid') + return (-1, 'Filter parameters type is not valid') elif filterType == 'array': self.rawStream = self.decodedStream filterElements = list(self.filter.getElements()) filterElements.reverse() - if self.filterParams != None and filterParamsType == 'array': + if self.filterParams is not None and filterParamsType == 'array': paramsArray = self.filterParams.getElements() - for j in range(len(paramsArray),len(filterElements)): + for j in range(len(paramsArray), len(filterElements)): paramsArray.append(PDFNull('Null')) paramsArray.reverse() else: paramsArray = [] for i in range(len(filterElements)): filter = filterElements[i] - if filter == None: + if filter is None: if isForceMode: errorMessage = 'Bad /Filter element in PDFArray' self.addError(errorMessage) continue - return (-1,'Bad /Filter element in PDFArray') + return (-1, 'Bad /Filter element in PDFArray') if filter.getType() == 'name': - if self.filterParams == None: + if self.filterParams is None: ret = encodeStream(self.rawStream, filter.getValue(), self.filterParams) if ret[0] == -1: errorMessage = 'Encoding error: '+ret[1] @@ -2360,17 +2382,17 @@ def encode (self) : self.addError(errorMessage) self.encodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.rawStream = ret[1] elif filterParamsType == 'array': paramsObj = paramsArray[i] - if paramsObj == None: + if paramsObj is None: if isForceMode: errorMessage = 'Bad /FilterParms element in PDFArray' self.addError(errorMessage) continue - return (-1,'Bad /FilterParms element in PDFArray') + return (-1, 'Bad /FilterParms element in PDFArray') paramsObjType = paramsObj.getType() if paramsObjType == 'dictionary': paramsDict = paramsObj.getElements() @@ -2384,45 +2406,45 @@ def encode (self) : self.addError(errorMessage) self.encodedStream = '' else: - return (-1,errorMessage) + return (-1, errorMessage) else: - self.rawStream = ret[1] + self.rawStream = ret[1] else: if isForceMode: errorMessage = 'One of the filters parameters type is not valid' self.addError(errorMessage) self.encodedStream = '' else: - return (-1,'One of the filters parameters type is not valid') + return (-1, 'One of the filters parameters type is not valid') else: if isForceMode: errorMessage = 'One of the filters type is not valid' self.addError(errorMessage) self.encodedStream = '' else: - return (-1,'One of the filters type is not valid') + return (-1, 'One of the filters type is not valid') else: if isForceMode: errorMessage = 'Filter type is not valid' self.addError(errorMessage) self.encodedStream = '' else: - return (-1,'Filter type is not valid') + return (-1, 'Filter type is not valid') self.encodedStream = self.rawStream if errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) else: - return (0,'') + return (0, '') else: - return (-1,'Empty stream') - - def encrypt(self, password = None): + return (-1, 'Empty stream') + + def encrypt(self, password=None): self.encrypted = True - if password != None: + if password is not None: self.encryptionKey = password ret = self.update() return ret - + def getEncryptedValue(self): return self.encryptedValue + newLine + 'stream' + newLine + self.rawStream + newLine + 'endstream' @@ -2448,11 +2470,11 @@ def getStats(self): stats['Type'] = self.dictType else: stats['Type'] = None - if self.elements.has_key('/Subtype'): + if "/Subtype" in self.elements: stats['Subtype'] = self.elements['/Subtype'].getValue() else: stats['Subtype'] = None - if self.elements.has_key('/S'): + if "/S" in self.elements: stats['Action type'] = self.elements['/S'].getValue() else: stats['Action type'] = None @@ -2463,12 +2485,12 @@ def getStats(self): stats['Real Length'] = None if self.isEncodedStream: stats['Encoded'] = True - if self.file != None: + if self.file is not None: stats['Stream File'] = self.file else: stats['Stream File'] = None stats['Filters'] = self.filter.getValue() - if self.filterParams != None: + if self.filterParams is not None: stats['Filter Parameters'] = True else: stats['Filter Parameters'] = False @@ -2491,19 +2513,19 @@ def getStats(self): else: stats['JSCode'] = False return stats - + def getStream(self): ''' - Gets the stream of the object - + Gets the stream of the object + @return: The stream of the object (string), this means applying filters or decoding characters ''' return self.decodedStream - + def getRawStream(self): ''' - Gets the raw value of the stream of the object - + Gets the raw value of the stream of the object + @return: The raw value of the stream (string), this means without applying filters or decoding characters ''' return self.rawStream @@ -2514,33 +2536,33 @@ def getRawValue(self): else: stream = self.decodedStream return self.rawValue + newLine + 'stream' + newLine + stream + newLine + 'endstream' - + def getValue(self): - return self.value + newLine +'stream' + newLine + self.decodedStream + newLine + 'endstream' - + return self.value + newLine + 'stream' + newLine + self.decodedStream + newLine + 'endstream' + def isEncoded(self): ''' - Specifies if the stream is encoded with some type of filter (/Filter) - + Specifies if the stream is encoded with some type of filter (/Filter) + @return: A boolean ''' return self.isEncodedStream - + def isFaultyDecoding(self): ''' - Specifies if there are any errors in the process of decoding the stream - + Specifies if there are any errors in the process of decoding the stream + @return: A boolean ''' return self.decodingError - + def replace(self, string1, string2): stringFound = False # Dictionary newElements = {} errorMessage = '' for key in self.elements: - if key == '/F' and self.elements[key] != None: + if key == '/F' and self.elements[key] is not None: externalFile = self.elements[key].getValue() if externalFile != self.file: self.modifiedRawStream = True @@ -2566,23 +2588,23 @@ def replace(self, string1, string2): if not self.modifiedRawStream: oldDecodedStream = self.decodedStream if self.decodedStream.find(string1) != -1: - self.decodedStream = self.decodedStream.replace(string1,string2) + self.decodedStream = self.decodedStream.replace(string1, string2) stringFound = True if errorMessage == 'String not found': errorMessage = '' if oldDecodedStream != self.decodedStream: self.modifiedStream = True if not stringFound: - return (-1,'String not found') + return (-1, 'String not found') self.elements = newElements ret = self.update() if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret def resolveReferences(self): errorMessage = '' - if self.referencesInElements.has_key('/Length'): + if "/Length" in self.referencesInElements: value = self.referencesInElements['/Length'][1] self.size = int(value) self.cleanStream() @@ -2603,15 +2625,15 @@ def resolveReferences(self): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if errorMessage != '': - return (-1,errorMessage) - return (0,'') + return (-1, errorMessage) + return (0, '') def setDecodedStream(self, newStream): ''' Sets the decoded value of the stream and updates the object if some modification is needed - + @param newStream: The new raw value (string) @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -2619,33 +2641,30 @@ def setDecodedStream(self, newStream): self.modifiedStream = True ret = self.update() return ret - - def setElement(self, name, value, update = True): + + def setElement(self, name, value, update=True): onlyElements = True - if name in ['/Filter','/DecodeParm','/FFilter','/FDecodeParm']: + if name in ['/Filter', '/DecodeParm', '/FFilter', '/FDecodeParm']: self.newFilters = True onlyElements = False self.elements[name] = value if update: - ret = self.update(onlyElements = onlyElements) + ret = self.update(onlyElements=onlyElements) return ret - return (0,'') - + return (0, '') + def setElements(self, newElements): - diffElements = [] - oldElements = self.elements.keys() + oldElements = list(self.elements.keys()) for oldElement in oldElements: if oldElement not in newElements: - if oldElement in ['/Filter','/FFilter']: + if oldElement in ['/Filter', '/FFilter']: self.deletedFilters = True - onlyElements = False break self.elements = newElements if not self.deletedFilters: for name in self.elements: - if name in ['/Filter','/DecodeParm','/FFilter','/FDecodeParm']: + if name in ['/Filter', '/DecodeParm', '/FFilter', '/FDecodeParm']: self.newFilters = True - onlyElements = False break ret = self.update() return ret @@ -2664,7 +2683,7 @@ def setReferencedJSObject(self, value): def setRawStream(self, newStream): ''' Sets the raw value of the stream and updates the object if some modification is needed - + @param newStream: The new raw value (string) @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' @@ -2672,11 +2691,10 @@ def setRawStream(self, newStream): self.modifiedRawStream = True ret = self.update() return ret - -class PDFObjectStream (PDFStream) : - def __init__(self, rawDict = '', rawStream = '', elements = {}, rawNames = {}, compressedObjectsDict = {}) : - global isForceMode + +class PDFObjectStream (PDFStream): + def __init__(self, rawDict='', rawStream='', elements={}, rawNames={}, compressedObjectsDict={}): self.type = 'stream' self.dictType = '' self.errors = [] @@ -2693,7 +2711,7 @@ def __init__(self, rawDict = '', rawStream = '', elements = {}, rawNames = {}, c self.rawValue = rawDict self.encryptedValue = rawDict self.rawNames = rawNames - self.value = '' # string + self.value = '' # string self.updateNeeded = False self.containsJScode = False self.referencedJSObject = False @@ -2721,14 +2739,14 @@ def __init__(self, rawDict = '', rawStream = '', elements = {}, rawNames = {}, c if isForceMode: self.addError(ret[1]) else: - raise Exception(ret[1]) + raise Exception(ret[1]) else: self.addError('No dictionary in stream object') - def update(self, modifiedCompressedObjects = False, onlyElements = False, decrypt = False, algorithm = 'RC4'): + def update(self, modifiedCompressedObjects=False, onlyElements=False, decrypt=False, algorithm='RC4'): ''' Updates the object after some modification has occurred - + @param modifiedCompressedObjects: A boolean indicating if the compressed objects hav been modified. By default: False. @param onlyElements: A boolean indicating if it's only necessary to update the stream dictionary or also the stream itself. By default: False (stream included). @param decrypt: A boolean indicating if a decryption has been performed. By default: False. @@ -2737,8 +2755,8 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp self.value = '<< ' self.rawValue = '<< ' self.encryptedValue = '<< ' - keys = self.elements.keys() - values = self.elements.values() + keys = list(self.elements.keys()) + values = list(self.elements.values()) if not onlyElements: self.errors = [] self.references = [] @@ -2747,62 +2765,62 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp self.urlsFound = [] self.containsJScode = False self.decodingError = False - + # Dictionary - if self.elements.has_key('/First') and self.elements['/First'] != None: + if "/First" in self.elements and self.elements['/First'] is not None: self.firstObjectOffset = self.elements['/First'].getRawValue() else: if isForceMode: self.addError('No /First element in the object stream or it\'s None') else: - return (-1,'No /First element in the object stream or it\'s None') - if self.elements.has_key('/N') and self.elements['/N'] != None: + return (-1, 'No /First element in the object stream or it\'s None') + if "/N" in self.elements and self.elements['/N'] is not None: self.numCompressedObjects = self.elements['/N'].getRawValue() else: if isForceMode: self.addError('No /N element in the object stream or it\'s None') else: - return (-1,'No /N element in the object stream or it\'s None') + return (-1, 'No /N element in the object stream or it\'s None') - if self.elements.has_key('/Extends') and self.elements['/Extends'] != None: + if "/Extends" in self.elements and self.elements['/Extends'] is not None: self.extends = self.elements['/Extends'].getValue() - if self.elements.has_key('/Length'): + if "/Length" in self.elements: length = self.elements['/Length'] - if length != None: + if length is not None: if length.getType() == 'integer': self.size = length.getRawValue() elif length.getType() == 'reference': self.updateNeeded = True - self.referencesInElements['/Length'] = [length.getId(),''] + self.referencesInElements['/Length'] = [length.getId(), ''] else: if isForceMode: self.addError('No permitted type for /Length element') else: - return (-1,'No permitted type for /Length element') + return (-1, 'No permitted type for /Length element') else: if isForceMode: self.addError('None /Length element') else: - return (-1,'None /Length element') + return (-1, 'None /Length element') else: if isForceMode: self.addError('Missing /Length in stream object') else: - return (-1,'Missing /Length in stream object') - - if self.elements.has_key('/F'): + return (-1, 'Missing /Length in stream object') + + if "/F" in self.elements: self.file = self.elements['/F'].getValue() if os.path.exists(self.file): - self.rawStream = open(self.file,'rb').read() + self.rawStream = open(self.file, 'rb').read() else: if isForceMode: self.addError('File "'+self.file+'" does not exist (/F)') self.rawStream = '' else: - return (-1,'File "'+self.file+'" does not exist (/F)') - - if self.elements.has_key('/Filter'): + return (-1, 'File "'+self.file+'" does not exist (/F)') + + if "/Filter" in self.elements: self.filter = self.elements['/Filter'] if self.newFilters or self.modifiedStream: self.encodedStream = '' @@ -2810,7 +2828,7 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp elif not self.encrypted: self.encodedStream = self.rawStream self.isEncodedStream = True - elif self.elements.has_key('/FFilter'): + elif "/FFilter" in self.elements: self.filter = self.elements['/FFilter'] if self.newFilters or self.modifiedStream: self.encodedStream = '' @@ -2826,28 +2844,28 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp self.decodedStream = self.rawStream self.isEncodedStream = False if self.isEncodedStream: - if self.elements.has_key('/DecodeParms'): + if "/DecodeParms" in self.elements: self.filterParams = self.elements['/DecodeParms'] - elif self.elements.has_key('/FDecodeParms'): + elif "/FDecodeParms" in self.elements: self.filterParams = self.elements['/FDecodeParms'] - elif self.elements.has_key('/DP'): + elif "/DP" in self.elements: self.filterParams = self.elements['/DP'] else: self.filterParams = None - + for i in range(len(keys)): valueElement = values[i] - if valueElement == None: + if valueElement is None: if isForceMode: errorMessage = 'Stream dictionary has a None value' self.addError(errorMessage) valueElement = PDFString('') else: - return (-1,'Stream dictionary has a None value') + return (-1, 'Stream dictionary has a None value') v = valueElement.getValue() type = valueElement.getType() if type == 'reference': - if v not in self.references: + if v not in self.references: self.references.append(v) elif type == 'dictionary' or type == 'array': self.references = list(set(self.references + valueElement.getReferences())) @@ -2859,13 +2877,13 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if valueElement.isFaulty(): errorMessage = 'Child element is faulty' self.addError(errorMessage) - if self.rawNames.has_key(keys[i]): + if keys[i] in self.rawNames: rawName = self.rawNames[keys[i]] rawValue = rawName.getRawValue() else: rawValue = keys[i] self.rawNames[keys[i]] = PDFName(keys[i][1:]) - if type in ['string','hexstring','array','dictionary'] and self.encrypted and not decrypt: + if type in ['string', 'hexstring', 'array', 'dictionary'] and self.encrypted and not decrypt: ret = valueElement.encrypt(self.encryptionKey) if ret[0] == -1: errorMessage = ret[1]+' in child element' @@ -2876,20 +2894,20 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp self.encryptedValue = self.encryptedValue[:-1] + ' >>' self.rawValue = self.rawValue[:-1] + ' >>' self.value = self.value[:-1] + ' >>' - + if not onlyElements: # Stream if self.deletedFilters or self.newFilters or self.modifiedStream or self.modifiedRawStream or modifiedCompressedObjects or self.encrypted: if self.deletedFilters: if self.encrypted: try: - self.rawStream = RC4(self.decodedStream,self.encryptionKey) + self.rawStream = RC4(self.decodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.decodedStream) @@ -2898,13 +2916,13 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if ret[0] != -1: if self.encrypted: try: - self.rawStream = RC4(self.encodedStream,self.encryptionKey) + self.rawStream = RC4(self.encodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.encodedStream) @@ -2916,26 +2934,26 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if ret[0] != -1: if self.encrypted: try: - self.rawStream = RC4(self.encodedStream,self.encryptionKey) + self.rawStream = RC4(self.encodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.encodedStream) else: if self.encrypted: try: - self.rawStream = RC4(self.decodedStream,self.encryptionKey) + self.rawStream = RC4(self.decodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: self.size = len(self.decodedStream) @@ -2948,9 +2966,9 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if decrypt: try: if algorithm == 'RC4': - self.encodedStream = RC4(self.rawStream,self.encryptionKey) + self.encodedStream = RC4(self.rawStream, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(self.rawStream,self.encryptionKey) + ret = AES.decryptData(self.rawStream, self.encryptionKey) if ret[0] != -1: self.encodedStream = ret[1] else: @@ -2958,60 +2976,62 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) except: errorMessage = 'Error decrypting stream with '+str(algorithm) if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.encodedStream = self.rawStream try: - self.rawStream = RC4(self.rawStream,self.encryptionKey) + self.rawStream = RC4(self.rawStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.decode() else: try: - self.decodedStream = RC4(self.rawStream,self.encryptionKey) + self.decodedStream = RC4(self.rawStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: if self.isEncodedStream: self.decode() self.size = len(self.rawStream) - offsetsSection = self.decodedStream[:self.firstObjectOffset] - objectsSection = self.decodedStream[self.firstObjectOffset:] - numbers = re.findall('\d{1,10}', offsetsSection) - if numbers != [] and len(numbers) % 2 == 0: - for i in range(0,len(numbers),2): - id = int(numbers[i]) - offset = int(numbers[i+1]) - ret = PDFParser().readObject(objectsSection[offset:]) - if ret[0] == -1: - if isForceMode: - object = None - self.addError(ret[1]) + + if not self.updateNeeded: + offsetsSection = self.decodedStream[:self.firstObjectOffset] + objectsSection = self.decodedStream[self.firstObjectOffset:] + numbers = re.findall('\d{1,10}', offsetsSection) + if numbers != [] and len(numbers) % 2 == 0: + for i in range(0, len(numbers), 2): + id = int(numbers[i]) + offset = int(numbers[i+1]) + ret = PDFParser().readObject(objectsSection[offset:]) + if ret[0] == -1: + if isForceMode: + object = None + self.addError(ret[1]) + else: + return ret else: - return ret - else: - object = ret[1] - self.compressedObjectsDict[id] = [offset,object] - self.indexes.append(id) - else: - if isForceMode: - self.addError('Missing offsets in object stream') + object = ret[1] + self.compressedObjectsDict[id] = [offset, object] + self.indexes.append(id) else: - return (-1,'Missing offsets in object stream') + if isForceMode: + self.addError('Missing offsets in object stream') + else: + return (-1, 'Missing offsets in object stream') elif modifiedCompressedObjects: tmpStreamObjects = '' tmpStreamObjectsInfo = '' @@ -3020,12 +3040,12 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp tmpStreamObjectsInfo += str(objectId)+' '+str(offset)+' ' object = self.compressedObjectsDict[objectId][1] tmpStreamObjects += object.toFile() - self.compressedObjectsDict[objectId] = [offset,object] + self.compressedObjectsDict[objectId] = [offset, object] self.decodedStream = tmpStreamObjectsInfo + tmpStreamObjects self.firstObjectOffset = len(tmpStreamObjectsInfo) - self.setElementValue('/First',str(self.firstObjectOffset)) + self.setElementValue('/First', str(self.firstObjectOffset)) self.numCompressedObjects = len(self.compressedObjectsDict) - self.setElementValue('/N',str(self.numCompressedObjects)) + self.setElementValue('/N', str(self.numCompressedObjects)) if self.isEncodedStream: self.encode() self.size = len(self.encodedStream) @@ -3035,23 +3055,23 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if not decrypt: try: if self.isEncodedStream: - self.rawStream = RC4(self.encodedStream,self.encryptionKey) + self.rawStream = RC4(self.encodedStream, self.encryptionKey) else: - self.rawStream = RC4(self.decodedStream,self.encryptionKey) + self.rawStream = RC4(self.decodedStream, self.encryptionKey) except: errorMessage = 'Error encrypting stream with RC4' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.size = len(self.rawStream) else: if self.isEncodedStream: try: if algorithm == 'RC4': - self.encodedStream = RC4(self.rawStream,self.encryptionKey) + self.encodedStream = RC4(self.rawStream, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(self.rawStream,self.encryptionKey) + ret = AES.decryptData(self.rawStream, self.encryptionKey) if ret[0] != -1: self.encodedStream = ret[1] else: @@ -3059,20 +3079,20 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) except: - errorMessage = 'Error decrypting stream with '+str(algorithm) + errorMessage = 'Error decrypting stream with '+str(algorithm) if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.decode() else: try: if algorithm == 'RC4': - self.decodedStream = RC4(self.rawStream,self.encryptionKey) + self.decodedStream = RC4(self.rawStream, self.encryptionKey) elif algorithm == 'AES': - ret = AES.decryptData(self.rawStream,self.encryptionKey) + ret = AES.decryptData(self.rawStream, self.encryptionKey) if ret[0] != -1: self.decodedStream = ret[1] else: @@ -3080,18 +3100,18 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) except: - errorMessage = 'Error decrypting stream with '+str(algorithm) + errorMessage = 'Error decrypting stream with '+str(algorithm) if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) offsetsSection = self.decodedStream[:self.firstObjectOffset] objectsSection = self.decodedStream[self.firstObjectOffset:] numbers = re.findall('\d{1,10}', offsetsSection) if numbers != [] and len(numbers) % 2 == 0: - for i in range(0,len(numbers),2): + for i in range(0, len(numbers), 2): id = int(numbers[i]) offset = int(numbers[i+1]) ret = PDFParser().readObject(objectsSection[offset:]) @@ -3103,13 +3123,13 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp return ret else: object = ret[1] - self.compressedObjectsDict[id] = [offset,object] + self.compressedObjectsDict[id] = [offset, object] self.indexes.append(id) else: if isForceMode: self.addError('Missing offsets in object stream') else: - return (-1,'Missing offsets in object stream') + return (-1, 'Missing offsets in object stream') if not self.isFaultyDecoding(): refs = re.findall('(\d{1,5}\s{1,3}\d{1,5}\s{1,3}R)', self.decodedStream) if refs != []: @@ -3124,43 +3144,43 @@ def update(self, modifiedCompressedObjects = False, onlyElements = False, decryp if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if not self.modifiedRawStream: self.modifiedStream = False self.newFilters = False self.deletedFilters = False errors = self.errors try: - self.setElement('/Length',PDFNum(str(self.size))) + self.setElement('/Length', PDFNum(str(self.size))) self.errors += errors except: errorMessage = 'Error creating PDFNum' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: self.modifiedRawStream = False self.modifiedStream = False self.newFilters = False self.deletedFilters = False if self.errors != []: - return (-1,self.errors[-1]) + return (-1, self.errors[-1]) else: - return (0,'') + return (0, '') def getCompressedObjects(self): ''' - Gets the information of the compressed objects: offset and content. - + Gets the information of the compressed objects: offset and content. + @return: A dictionary with this information: {id: [offset,PDFObject]} ''' return self.compressedObjectsDict def getObjectIndex(self, id): ''' - Gets the index of the object in the dictionary of compressed objects - + Gets the index of the object in the dictionary of compressed objects + @param id: The object id @return: The index (int) or None if the object hasn't been found ''' @@ -3168,14 +3188,14 @@ def getObjectIndex(self, id): return None else: return self.indexes.index(id) - + def replace(self, string1, string2): stringFound = False # Dictionary newElements = {} errorMessage = '' for key in self.elements: - if key == '/F' and self.elements[key] != None: + if key == '/F' and self.elements[key] is not None: externalFile = self.elements[key].getValue() if externalFile != self.file: self.modifiedRawStream = True @@ -3206,19 +3226,19 @@ def replace(self, string1, string2): errorMessage = '' for compressedObjectId in self.compressedObjectsDict: object = self.compressedObjectsDict[compressedObjectId][1] - object.replace(string1,string2) + object.replace(string1, string2) self.compressedObjectsDict[compressedObjectId][1] = object if not stringFound: - return (-1,'String not found') + return (-1, 'String not found') self.elements = newElements ret = self.update(modifiedObjects) if ret[0] == 0 and errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) return ret - + def resolveReferences(self): errorMessage = '' - if self.referencesInElements.has_key('/Length'): + if "/Length" in self.referencesInElements: value = self.referencesInElements['/Length'][1] self.size = int(value) self.cleanStream() @@ -3237,10 +3257,10 @@ def resolveReferences(self): objectsSection = self.decodedStream[self.firstObjectOffset:] numbers = re.findall('\d{1,10}', offsetsSection) if numbers != [] and len(numbers) % 2 == 0: - for i in range(0,len(numbers),2): - id = numbers[i] - offset = numbers[i+1] - ret = PDFParser.readObject(objectsSection[offset:]) + for i in range(0, len(numbers), 2): + id = int(numbers[i]) + offset = int(numbers[i+1]) + ret = PDFParser().readObject(objectsSection[offset:]) if ret[0] == -1: if isForceMode: object = None @@ -3249,65 +3269,66 @@ def resolveReferences(self): return ret else: object = ret[1] - self.compressedObjectsDict[numbers[i]] = [offset,object] + self.compressedObjectsDict[id] = [offset, object] + self.indexes.append(id) else: errorMessage = 'Missing offsets in object stream' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) else: - return (0,'') - - def setCompressedObjectId(self,id): + return (0, '') + + def setCompressedObjectId(self, id): ''' Sets the compressedIn attribute of the compressed object defined by its id - + @param id: The object id @return: A tuple (status,statusContent), where statusContent is empty in case status = 0 or an error message in case status = -1 ''' for compressedId in self.compressedObjectsDict: - if self.compressedObjectsDict[compressedId] != None: + if self.compressedObjectsDict[compressedId] is not None: object = self.compressedObjectsDict[compressedId][1] object.setCompressedIn(id) self.compressedObjectsDict[compressedId][1] = object else: - return (-1,'Compressed object corrupted') - return (0,'') + return (-1, 'Compressed object corrupted') + return (0, '') -class PDFIndirectObject : - def __init__(self) : - self.referenced = [] # int[] - self.object = None # PDFObject - self.offset = 0 # int - self.generationNumber = 0 # int - self.id = None # int - self.size = 0 # int - +class PDFIndirectObject: + def __init__(self): + self.referenced = [] # int[] + self.object = None # PDFObject + self.offset = 0 # int + self.generationNumber = 0 # int + self.id = None # int + self.size = 0 # int + def contains(self, string): return self.object.contains(string) def getErrors(self): return self.object.getErrors() - + def getGenerationNumber(self): return self.generationNumber - + def getId(self): return self.id - + def getObject(self): return self.object def getOffset(self): return self.offset - + def getReferences(self): return self.object.getReferences() - + def getSize(self): return self.size @@ -3322,21 +3343,21 @@ def getStats(self): def isFaulty(self): return self.object.isFaulty() - + def setGenerationNumber(self, generationNumber): self.generationNumber = generationNumber - + def setId(self, id): self.id = id - + def setObject(self, object): self.object = object - + def setOffset(self, offset): self.offset = offset def setSize(self, newSize): - self.size = newSize + self.size = newSize def toFile(self): rawValue = self.object.toFile() @@ -3345,13 +3366,13 @@ def toFile(self): return output -class PDFCrossRefSection : - def __init__(self) : +class PDFCrossRefSection: + def __init__(self): self.errors = [] self.streamObject = None self.offset = 0 self.size = 0 - self.subsections = [] # PDFCrossRefSubsection [] + self.subsections = [] # PDFCrossRefSubsection [] self.bytesPerField = [] def addEntry(self, objectId, newEntry): @@ -3375,27 +3396,26 @@ def addEntry(self, objectId, newEntry): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) self.subsections.insert(prevSubsection, newSubsection) if errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) else: - return (0,'') - + return (0, '') + def addError(self, errorMessage): if errorMessage not in self.errors: self.errors.append(errorMessage) def addSubsection(self, subsection): self.subsections.append(subsection) - + def delEntry(self, objectId): - prevSubsection = 0 errorMessage = '' for i in range(len(self.subsections)): subsection = self.subsections[i] numEntry = subsection.getIndex(objectId) - if numEntry != None: + if numEntry is not None: if subsection.getNumObjects() == 1: self.subsections.remove(subsection) else: @@ -3405,10 +3425,10 @@ def delEntry(self, objectId): self.addError(ret[1]) continue if errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) else: - return (0,'') - + return (0, '') + def getBytesPerField(self): return self.bytesPerField @@ -3420,19 +3440,19 @@ def getFreeObjectIds(self): for subsection in self.subsections: ids += subsection.getFreeObjectIds() return ids - + def getNewObjectIds(self): ids = [] for subsection in self.subsections: ids += subsection.getNewObjectIds() return ids - + def getOffset(self): return self.offset - + def getSize(self): return self.size - + def getStats(self): stats = {} if self.offset != -1: @@ -3474,19 +3494,19 @@ def isFaulty(self): return False else: return True - + def inStream(self): - if self.streamObject != None: + if self.streamObject is not None: return True else: return False - + def setBytesPerField(self, array): - self.bytesPerField = array + self.bytesPerField = array def setOffset(self, offset): self.offset = offset - + def setSize(self, newSize): self.size = newSize @@ -3502,7 +3522,7 @@ def toFile(self): def updateOffset(self, objectId, newOffset): for subsection in self.subsections: updatedEntry = subsection.getEntry(objectId) - if updatedEntry != None: + if updatedEntry is not None: updatedEntry.setObjectOffset(newOffset) ret = subsection.setEntry(objectId, updatedEntry) if ret[0] == -1: @@ -3511,11 +3531,11 @@ def updateOffset(self, objectId, newOffset): else: errorMessage = 'Object entry not found' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) class PDFCrossRefSubSection: - def __init__(self, firstObject, numObjects = 0, newEntries = [], offset = 0) : + def __init__(self, firstObject, numObjects=0, newEntries=[], offset=0): self.errors = [] self.offset = offset self.size = 0 @@ -3523,43 +3543,43 @@ def __init__(self, firstObject, numObjects = 0, newEntries = [], offset = 0) : self.numObjects = int(numObjects) self.entries = newEntries - def addEntry(self, newEntry, objectId = None): - if objectId == None: + def addEntry(self, newEntry, objectId=None): + if objectId is None: self.entries.append(newEntry) self.numObjects += 1 - return (0,self.numObjects) + return (0, self.numObjects) else: numEntry = self.getIndex(objectId) - if numEntry != None: + if numEntry is not None: self.entries.insert(numEntry, newEntry) self.numObjects += 1 - return (0,self.numObjects) + return (0, self.numObjects) else: if self.firstObject == objectId + 1: self.entries.insert(0, newEntry) self.firstObject = objectId self.numObjects += 1 - return (0,self.numObjects) + return (0, self.numObjects) elif objectId == self.firstObject + self.numObjects: self.entries.append(newEntry) self.numObjects += 1 - return (0,self.numObjects) + return (0, self.numObjects) else: errorMessage = 'Unspecified error' self.addError(errorMessage) - return (-1,errorMessage) - return (0,self.numObjects) - + return (-1, errorMessage) + return (0, self.numObjects) + def addError(self, errorMessage): if errorMessage not in self.errors: self.errors.append(errorMessage) def delEntry(self, objectId): numEntry = self.getIndex(objectId) - if numEntry == None: + if numEntry is None: errorMessage = 'Entry not found' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) if numEntry == 0: self.entries.pop(numEntry) self.firstObject = objectId + 1 @@ -3572,29 +3592,29 @@ def delEntry(self, objectId): numPrevFree = self.getPrevFree(numEntry) numNextFree = self.getNextFree(numEntry) nextObject = self.getObjectId(numNextFree) - if numPrevFree != None: + if numPrevFree is not None: prevEntry = self.entries[numPrevFree] prevEntry.setNextObject(objectId) self.entries[numPrevFree] = prevEntry entry.setType('f') - if nextObject == None: + if nextObject is None: entry.setNextObject(0) else: entry.setNextObject(nextObject) entry.incGenNumber() self.entries[numEntry] = entry - return (0,numEntry) + return (0, numEntry) def getEntries(self): return self.entries - + def getEntry(self, objectId): numEntry = self.getIndex(objectId) - if numEntry != None: + if numEntry is not None: return self.entries[numEntry] else: return None - + def getErrors(self): return self.errors @@ -3606,22 +3626,22 @@ def getFreeObjectIds(self): for i in range(len(self.entries)): if self.entries[i].getType() == 'f': ids.append(self.getObjectId(i)) - return ids - + return ids + def getIndex(self, objectId): - objectIds = range(self.firstObject,self.firstObject+self.numObjects) + objectIds = list(range(self.firstObject, self.firstObject+self.numObjects)) if objectId in objectIds: return objectIds.index(objectId) else: return None def getNextFree(self, numEntry): - for i in range(numEntry + 1,self.numObjects): + for i in range(numEntry + 1, self.numObjects): if self.entries[i].getType() == 'f': return i else: return None - + def getNewObjectIds(self): ids = [] for i in range(len(self.entries)): @@ -3634,12 +3654,10 @@ def getNumObjects(self): def getObjectId(self, numEntry): return self.firstObject + numEntry - - + def getOffset(self): return self.offset - - + def getPrevFree(self, numEntry): for i in range(numEntry): if self.entries[i].getType() == 'f': @@ -3649,47 +3667,47 @@ def getPrevFree(self, numEntry): def getSize(self): return self.size - + def isFaulty(self): if self.errors == []: return False else: return True - + def setEntry(self, objectId, newEntry): numEntry = self.getIndex(objectId) - if numEntry != None: + if numEntry is not None: self.entries[numEntry] = newEntry - return (0,numEntry) + return (0, numEntry) else: errorMessage = 'Entry not found' self.addError(errorMessage) - return (-1,errorMessage) - + return (-1, errorMessage) + def setEntries(self, newEntries): self.entries = newEntries def setFirstObject(self, newFirst): self.firstObject = newFirst - + def setNumObjects(self, newNumObjects): self.numObjects = newNumObjects def setOffset(self, offset): self.offset = offset - + def setSize(self, newSize): self.size = newSize - + def toFile(self): output = str(self.firstObject) + ' ' + str(self.numObjects) + newLine for entry in self.entries: output += entry.toFile() return output - + class PDFCrossRefEntry: - def __init__(self, firstValue, secondValue, type, offset = 0) : + def __init__(self, firstValue, secondValue, type, offset=0): self.errors = [] self.offset = offset self.objectStream = None @@ -3700,10 +3718,10 @@ def __init__(self, firstValue, secondValue, type, offset = 0) : self.entryType = type if type == 'f' or type == 0: self.nextObject = int(firstValue) - self.genNumber = int(secondValue) + self.genNumber = int(secondValue) elif type == 'n' or type == 1: self.objectOffset = int(firstValue) - self.genNumber = int(secondValue) + self.genNumber = int(secondValue) elif type == 2: self.objectStream = int(firstValue) self.indexObject = int(secondValue) @@ -3711,7 +3729,7 @@ def __init__(self, firstValue, secondValue, type, offset = 0) : if isForceMode: self.addError('Error parsing xref entry') else: - return (-1,'Error parsing xref entry') + return (-1, 'Error parsing xref entry') def addError(self, errorMessage): if errorMessage not in self.errors: @@ -3720,7 +3738,7 @@ def addError(self, errorMessage): def getEntryBytes(self, bytesPerField): bytesString = '' errorMessage = '' - + if self.entryType == 'f' or self.entryType == 0: type = 0 firstValue = self.nextObject @@ -3735,54 +3753,54 @@ def getEntryBytes(self, bytesPerField): secondValue = self.indexObject if bytesPerField[0] != 0: - ret = numToHex(type,bytesPerField[0]) + ret = numToHex(type, bytesPerField[0]) if ret[0] == -1: errorMessage = ret[1] if isForceMode: self.addError(ret[1]) - ret = numToHex(0,bytesPerField[0]) + ret = numToHex(0, bytesPerField[0]) bytesString += ret[1] else: return ret else: bytesString += ret[1] if bytesPerField[1] != 0: - ret = numToHex(firstValue,bytesPerField[1]) + ret = numToHex(firstValue, bytesPerField[1]) if ret[0] == -1: errorMessage = ret[1] if isForceMode: self.addError(ret[1]) - ret = numToHex(0,bytesPerField[1]) + ret = numToHex(0, bytesPerField[1]) bytesString += ret[1] else: return ret else: bytesString += ret[1] if bytesPerField[2] != 0: - ret = numToHex(secondValue,bytesPerField[2]) + ret = numToHex(secondValue, bytesPerField[2]) if ret[0] == -1: errorMessage = ret[1] if isForceMode: self.addError(ret[1]) - ret = numToHex(0,bytesPerField[1]) + ret = numToHex(0, bytesPerField[1]) bytesString += ret[1] else: return ret else: bytesString += ret[1] if errorMessage != '': - return (-1,errorMessage) - return (0,bytesString) - + return (-1, errorMessage) + return (0, bytesString) + def getErrors(self): return self.errors - + def getGenNumber(self): return self.genNumber def getIndexObject(self): return self.indexObject - + def getNextObject(self): return self.nextObject @@ -3791,25 +3809,25 @@ def getObjectOffset(self): def getObjectStream(self): return self.objectStream - + def getOffset(self): return self.offset def getType(self): return self.entryType - + def incGenNumber(self): self.genNumber += 1 - + def isFaulty(self): if self.errors == []: return False else: return True - + def setGenNumber(self, newGenNumber): self.genNumber = newGenNumber - + def setIndexObject(self, index): self.indexObject = index @@ -3818,25 +3836,24 @@ def setNextObject(self, newNextObject): def setObjectOffset(self, newOffset): self.objectOffset = newOffset - + def setObjectStream(self, id): self.objectStream = id - + def setOffset(self, offset): self.offset = offset def setType(self, newType): self.entryType = newType - - + def toFile(self): output = '' if self.entryType == 'n': - ret = numToString(self.objectOffset,10) + ret = numToString(self.objectOffset, 10) if ret[0] != -1: output += ret[1] elif self.entryType == 'f': - ret = numToString(self.nextObject,10) + ret = numToString(self.nextObject, 10) if ret[0] != -1: output += ret[1] output += ' ' @@ -3852,11 +3869,11 @@ def toFile(self): return output -class PDFBody : - def __init__(self) : - self.numObjects = 0 # int - self.objects = {} # PDFIndirectObjects{} - self.numStreams = 0 # int +class PDFBody: + def __init__(self): + self.numObjects = 0 # int + self.objects = {} # PDFIndirectObjects{} + self.numStreams = 0 # int self.numEncodedStreams = 0 self.numDecodingErrors = 0 self.numURIs = 0 @@ -3912,9 +3929,9 @@ def containsXrefStreams(self): return True else: return False - + def delObject(self, id): - if self.objects.has_key(id): + if id in self.objects: indirectObject = self.objects[id] return self.deregisterObject(indirectObject) else: @@ -3923,18 +3940,18 @@ def delObject(self, id): def deregisterObject(self, pdfIndirectObject): type = '' errorMessage = '' - if pdfIndirectObject == None: + if pdfIndirectObject is None: errorMessage = 'Indirect Object is None' pdfFile.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) id = pdfIndirectObject.getId() - if self.objects.has_key(id): + if id in self.objects: self.objects.pop(id) pdfObject = pdfIndirectObject.getObject() - if pdfObject == None: + if pdfObject is None: errorMessage = 'Object is None' pdfFile.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) objectType = pdfObject.getType() self.numObjects -= 1 if id in self.faultyObjects: @@ -3954,12 +3971,12 @@ def deregisterObject(self, pdfIndirectObject): self.numDecodingErrors -= 1 if pdfObject.hasElement('/Type'): typeObject = pdfObject.getElementByName('/Type') - if typeObject == None: + if typeObject is None: errorMessage = '/Type element is None' if isForceMode: pdfFile.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: type = typeObject.getValue() if type == '/XRef': @@ -3992,18 +4009,18 @@ def deregisterObject(self, pdfIndirectObject): if type == '': type = objectType if errorMessage != '': - return (-1,errorMessage) - return (0,type) + return (-1, errorMessage) + return (0, type) def encodeChars(self): errorMessage = '' for id in self.objects: indirectObject = self.objects[id] - if indirectObject != None: + if indirectObject is not None: object = indirectObject.getObject() - if object != None: + if object is not None: objectType = object.getType() - if objectType in ['string','name','array','dictionary','stream']: + if objectType in ['string', 'name', 'array', 'dictionary', 'stream']: ret = object.encodeChars() if ret[0] == -1: errorMessage = ret[1] @@ -4019,11 +4036,11 @@ def encodeChars(self): pdfFile.addError(errorMessage) if errorMessage != '': return (-1, errorMessage) - return (0,'') + return (0, '') def getCompressedObjects(self): return self.compressedObjects - + def getContainingJS(self): return self.containingJS @@ -4032,15 +4049,15 @@ def getContainingURIs(self): def getEncodedStreams(self): return self.encodedStreams - + def getFaultyObjects(self): return self.faultyObjects - + def getFaultyStreams(self): return self.faultyStreams - + def getIndirectObject(self, id): - if self.objects.has_key(id): + if id in self.objects: return self.objects[id] else: return None @@ -4056,24 +4073,24 @@ def getNextOffset(self): def getNumDecodingErrors(self): return self.numDecodingErrors - + def getNumEncodedStreams(self): return self.numEncodedStreams - + def getNumFaultyObjects(self): return len(self.faultyObjects) def getNumObjects(self): return self.numObjects - + def getNumStreams(self): return self.numStreams def getNumURIs(self): return len(self.uriList) - def getObject(self, id, indirect = False): - if self.objects.has_key(id): + def getObject(self, id, indirect=False): + if id in self.objects: indirectObject = self.objects[id] if indirect: return indirectObject @@ -4083,20 +4100,20 @@ def getObject(self, id, indirect = False): return None def getObjects(self): - return self.objects + return self.objects - def getObjectsByString (self, toSearch) : + def getObjectsByString(self, toSearch): matchedObjects = [] - for indirectObject in self.objects.values(): + for indirectObject in list(self.objects.values()): if indirectObject.contains(toSearch): matchedObjects.append(indirectObject.getId()) return matchedObjects - + def getObjectsIds(self): sortedIdsOffsets = [] sortedIds = [] - for indirectObject in self.objects.values(): - sortedIdsOffsets.append([indirectObject.getId(),indirectObject.getOffset()]) + for indirectObject in list(self.objects.values()): + sortedIdsOffsets.append([indirectObject.getId(), indirectObject.getOffset()]) sortedIdsOffsets = sorted(sortedIdsOffsets, key=lambda x: x[1]) for i in range(len(sortedIdsOffsets)): sortedIds.append(sortedIdsOffsets[i][0]) @@ -4104,16 +4121,16 @@ def getObjectsIds(self): def getObjectStreams(self): return self.objectStreams - + def getStreams(self): return self.streams def getSuspiciousActions(self): return self.suspiciousActions - + def getSuspiciousElements(self): return self.suspiciousElements - + def getSuspiciousEvents(self): return self.suspiciousEvents @@ -4128,23 +4145,23 @@ def getURLs(self): def getVulns(self): return self.vulns - + def getXrefStreams(self): return self.xrefStreams def registerObject(self, pdfIndirectObject): type = '' errorMessage = '' - if pdfIndirectObject == None: + if pdfIndirectObject is None: errorMessage = 'Indirect Object is None' pdfFile.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) id = pdfIndirectObject.getId() pdfObject = pdfIndirectObject.getObject() - if pdfObject == None: + if pdfObject is None: errorMessage = 'Object is None' pdfFile.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) objectType = pdfObject.getType() self.numObjects += 1 if pdfObject.isFaulty(): @@ -4166,13 +4183,13 @@ def registerObject(self, pdfIndirectObject): self.numDecodingErrors += 1 if pdfObject.hasElement('/Type'): typeObject = pdfObject.getElementByName('/Type') - if typeObject == None: + if typeObject is None: errorMessage = '/Type element is None' if isForceMode: pdfFile.addError(errorMessage) else: - return (-1,errorMessage) - else: + return (-1, errorMessage) + else: type = typeObject.getValue() if type == '/XRef': self.addXrefStream(id) @@ -4195,19 +4212,19 @@ def registerObject(self, pdfIndirectObject): if type == '': type = objectType if errorMessage != '': - return (-1,errorMessage) - return (0,type) + return (-1, errorMessage) + return (0, type) def setNextOffset(self, newOffset): self.nextOffset = newOffset - def setObject(self, id = None, object = None, offset = None, modification = False): + def setObject(self, id=None, object=None, offset=None, modification=False): errorMessage = '' - if self.objects.has_key(id): + if id in self.objects: pdfIndirectObject = self.objects[id] self.deregisterObject(pdfIndirectObject) pdfIndirectObject.setObject(object) - if offset != None: + if offset is not None: pdfIndirectObject.setOffset(offset) size = 12 + 3*len(newLine) + len(str(object.getRawValue())) + len(str(id)) pdfIndirectObject.setSize(size) @@ -4217,10 +4234,10 @@ def setObject(self, id = None, object = None, offset = None, modification = Fals if isForceMode: pdfFile.addError(errorMessage) else: - return (-1,errorMessage) - if id == None: + return (-1, errorMessage) + if id is None: id = self.numObjects+1 - if offset == None: + if offset is None: offset = self.getNextOffset() pdfIndirectObject = PDFIndirectObject() pdfIndirectObject.setId(id) @@ -4233,43 +4250,42 @@ def setObject(self, id = None, object = None, offset = None, modification = Fals ret = self.registerObject(pdfIndirectObject) if ret[0] == 0: if errorMessage != '': - return (-1,errorMessage) + return (-1, errorMessage) else: objectType = ret[1] - return (0,[id,objectType]) + return (0, [id, objectType]) else: return ret - def setObjects(self, objects): self.objects = objects - + def updateObjects(self): errorMessage = '' for id in self.toUpdate: updatedElements = {} object = self.objects[id].getObject() - if object == None: + if object is None: errorMessage = 'Object is None' if isForceMode: pdfFile.addError(errorMessage) continue else: - return (-1,errorMessage) + return (-1, errorMessage) elementsToUpdate = object.getReferencesInElements() - keys = elementsToUpdate.keys() + keys = list(elementsToUpdate.keys()) for key in keys: ref = elementsToUpdate[key] refId = ref[0] if refId in self.objects: refObject = self.objects[refId].getObject() - if refObject == None: + if refObject is None: errorMessage = 'Referenced object is None' if isForceMode: pdfFile.addError(errorMessage) continue else: - return (-1,errorMessage) + return (-1, errorMessage) ref[1] = refObject.getValue() updatedElements[key] = ref else: @@ -4278,7 +4294,7 @@ def updateObjects(self): pdfFile.addError(errorMessage) continue else: - return (-1,errorMessage) + return (-1, errorMessage) object.setReferencesInElements(updatedElements) object.resolveReferences() self.updateStats(id, object) @@ -4293,14 +4309,14 @@ def updateObjects(self): self.numDecodingErrors += 1 if object.hasElement('/Type'): typeObject = object.getElementByName('/Type') - if typeObject == None: + if typeObject is None: errorMessage = 'Referenced element is None' if isForceMode: pdfFile.addError(errorMessage) continue else: - return (-1,errorMessage) - else: + return (-1, errorMessage) + else: type = typeObject.getValue() if type == '/XRef': self.addXrefStream(id) @@ -4315,34 +4331,34 @@ def updateObjects(self): self.setObject(compressedId, compressedObject, offset) del(compressedObjectsDict) for id in self.referencedJSObjects: - if id not in self.containingJS: + if id not in self.containingJS and id in self.objects: object = self.objects[id].getObject() - if object == None: + if object is None: errorMessage = 'Object is None' if isForceMode: pdfFile.addError(errorMessage) continue else: - return (-1,errorMessage) + return (-1, errorMessage) object.setReferencedJSObject(True) self.updateStats(id, object) if errorMessage != '': - return (-1,errorMessage) - return (0,'') - - def updateOffsets (self) : + return (-1, errorMessage) + return (0, '') + + def updateOffsets(self): pass def updateStats(self, id, pdfObject, delete=False): - if pdfObject == None: + if pdfObject is None: errorMessage = 'Object is None' pdfFile.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) value = pdfObject.getValue() for event in monitorizedEvents: if value.find(event) != -1: printedEvent = event.strip() - if self.suspiciousEvents.has_key(printedEvent): + if printedEvent in self.suspiciousEvents: if delete: if id in self.suspiciousEvents[printedEvent]: self.suspiciousEvents[printedEvent].remove(id) @@ -4354,7 +4370,7 @@ def updateStats(self, id, pdfObject, delete=False): index = value.find(action) if index != -1 and (action == '/JS ' or len(value) == index + len(action) or value[index+len(action)] in delimiterChars+spacesChars): printedAction = action.strip() - if self.suspiciousActions.has_key(printedAction): + if printedAction in self.suspiciousActions: if delete: if id in self.suspiciousActions[printedAction]: self.suspiciousActions[printedAction].remove(id) @@ -4366,7 +4382,7 @@ def updateStats(self, id, pdfObject, delete=False): index = value.find(element) if index != -1 and (element == '/EmbeddedFiles ' or len(value) == index + len(element) or value[index+len(element)] in delimiterChars+spacesChars): printedElement = element.strip() - if self.suspiciousElements.has_key(printedElement): + if printedElement in self.suspiciousElements: if delete: if id in self.suspiciousElements[printedElement]: self.suspiciousElements[printedElement].remove(id) @@ -4386,7 +4402,7 @@ def updateStats(self, id, pdfObject, delete=False): self.javascriptCodePerObject.remove([id, jsCode]) for vuln in jsVulns: if jsCode.find(vuln) != -1: - if self.vulns.has_key(vuln) and id in self.vulns[vuln]: + if vuln in self.vulns and id in self.vulns[vuln]: self.vulns[vuln].remove(id) else: jsCode = pdfObject.getJSCode() @@ -4400,7 +4416,7 @@ def updateStats(self, id, pdfObject, delete=False): for code in jsCode: for vuln in jsVulns: if code.find(vuln) != -1: - if self.vulns.has_key(vuln): + if vuln in self.vulns: self.vulns[vuln].append(id) else: self.vulns[vuln] = [id] @@ -4421,7 +4437,7 @@ def updateStats(self, id, pdfObject, delete=False): self.uriList.append(uri) if [id, uri] not in self.uriListPerObject: self.uriListPerObject.append([id, uri]) - ## Extra checks + # Extra checks objectType = pdfObject.getType() if objectType == 'stream': vulnFound = None @@ -4436,8 +4452,8 @@ def updateStats(self, id, pdfObject, delete=False): # Adobe Reader BMP/RLE heap corruption # http://blog.binamuse.com/2013/05/readerbmprle.html vulnFound = bmpVuln - if vulnFound != None: - if self.suspiciousElements.has_key(vulnFound): + if vulnFound is not None: + if vulnFound in self.suspiciousElements: if delete: if id in self.suspiciousElements[vulnFound]: self.suspiciousElements[vulnFound].remove(id) @@ -4445,12 +4461,11 @@ def updateStats(self, id, pdfObject, delete=False): self.suspiciousElements[vulnFound].append(id) elif not delete: self.suspiciousElements[vulnFound] = [id] - return (0,'') - + return (0, '') -class PDFTrailer : - def __init__(self, dict, lastCrossRefSection = '0', streamPresent = False): +class PDFTrailer: + def __init__(self, dict, lastCrossRefSection='0', streamPresent=False): self.errors = [] self.dict = dict self.offset = 0 @@ -4468,16 +4483,16 @@ def __init__(self, dict, lastCrossRefSection = '0', streamPresent = False): self.addError(ret[1]) else: raise Exception(ret[1]) - - def update(self, streamPresent = False): + + def update(self, streamPresent=False): errorMessage = '' - if self.dict == None: + if self.dict is None: errorMessage = 'The trailer dictionary is None' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) if self.dict.hasElement('/Root'): reference = self.dict.getElementByName('/Root') - if reference != None: + if reference is not None: if reference.getType() == 'reference': self.catalogId = reference.getId() else: @@ -4485,23 +4500,23 @@ def update(self, streamPresent = False): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: errorMessage = 'No reference element in /Root' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: if not streamPresent: errorMessage = 'Missing /Root element' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if self.dict.hasElement('/Size'): size = self.dict.getElementByName('/Size') - if size != None: + if size is not None: if size.getType() == 'integer': self.numObjects = size.getRawValue() else: @@ -4509,23 +4524,23 @@ def update(self, streamPresent = False): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: errorMessage = 'No integer element in /Size' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: if not streamPresent: errorMessage = 'Missing /Size element' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if self.dict.hasElement('/Info'): info = self.dict.getElementByName('/Info') - if info != None: + if info is not None: if info.getType() == 'reference': self.infoId = info.getId() else: @@ -4533,26 +4548,26 @@ def update(self, streamPresent = False): if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) else: errorMessage = 'No reference element in /Info' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) if self.dict.hasElement('/ID'): arrayID = self.dict.getElementByName('/ID') - if arrayID != None: + if arrayID is not None: if arrayID.getType() == 'array': - self.id = arrayID.getRawValue() + self.id = arrayID.getRawValue() if errorMessage != '': - return (-1,errorMessage) - return (0,'') + return (-1, errorMessage) + return (0, '') def addError(self, errorMessage): if errorMessage not in self.errors: self.errors.append(errorMessage) - + def encodeChars(self): ret = self.dict.encodeChars() if ret[0] == -1: @@ -4561,7 +4576,7 @@ def encodeChars(self): def getCatalogId(self): return self.catalogId - + def getDictEntry(self, name): if self.dict.hasElement(name): return self.dict.getElementByName(name) @@ -4569,32 +4584,32 @@ def getDictEntry(self, name): return None def getEOFOffset(self): - return self.eofOffset + return self.eofOffset def getErrors(self): return self.errors def getID(self): return self.id - + def getInfoId(self): return self.infoId def getLastCrossRefSection(self): return self.lastCrossRefSection - + def getNumObjects(self): return self.numObjects - + def getOffset(self): return self.offset - + def getPrevCrossRefSection(self): return self.dict.getElementByName('/Prev') - + def getSize(self): return self.size - + def getStats(self): stats = {} if self.offset != -1: @@ -4616,7 +4631,7 @@ def getStats(self): stats['Info Object'] = str(self.infoId) else: stats['Info Object'] = None - if self.dict.hasElement('/ID') and self.id != None and self.id != '' and self.id != ' ': + if self.dict.hasElement('/ID') and self.id is not None and self.id != '' and self.id != ' ': stats['ID'] = self.id else: stats['ID'] = None @@ -4639,9 +4654,9 @@ def getTrailerDictionary(self): def getXrefStreamObject(self): return self.streamObject - + def inStream(self): - if self.streamObject != None: + if self.streamObject is not None: return True else: return False @@ -4651,27 +4666,27 @@ def isFaulty(self): return False else: return True - + def setCatalogId(self, newId): self.catalogId = newId - + def setDictEntry(self, entry, value): - ret = self.dict.setElement(entry,value) + ret = self.dict.setElement(entry, value) if ret[0] == -1: errorMessage = ret[1]+' in dictionary element' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) return ret def setEOFOffset(self, offset): self.eofOffset = offset - + def setInfoId(self, newId): self.infoId = newId def setID(self, newId): self.id = newId - + def setLastCrossRefSection(self, newOffset): self.lastCrossRefSection = newOffset @@ -4685,12 +4700,12 @@ def setNumObjects(self, newNumObjects): self.addError(errorMessage) size = PDFNum('0') else: - return (-1,errorMessage) + return (-1, errorMessage) ret = self.setDictEntry('/Size', size) return ret - + def setOffset(self, offset): - self.offset = offset + self.offset = offset def setPrevCrossRefSection(self, newOffset): try: @@ -4701,17 +4716,17 @@ def setPrevCrossRefSection(self, newOffset): self.addError(errorMessage) prevSectionObject = PDFNum('0') else: - return (-1,errorMessage) + return (-1, errorMessage) ret = self.dict.setElement('/Prev', prevSectionObject) if ret[0] == -1: errorMessage = ret[1]+' in dictionary element' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) return ret def setSize(self, newSize): self.size = newSize - + def setTrailerDictionary(self, newDict): self.dict = newDict ret = self.update() @@ -4719,7 +4734,7 @@ def setTrailerDictionary(self, newDict): def setXrefStreamObject(self, id): self.streamObject = id - + def toFile(self): output = '' if self.dict.getNumElements() > 0: @@ -4729,10 +4744,10 @@ def toFile(self): output += str(self.lastCrossRefSection) + newLine output += '%%EOF' + newLine return output - -class PDFFile : - def __init__(self) : + +class PDFFile: + def __init__(self): self.fileName = '' self.path = '' self.size = 0 @@ -4741,7 +4756,7 @@ def __init__(self) : self.sha256 = '' self.detectionRate = [] self.detectionReport = '' - self.body = [] # PDFBody[] + self.body = [] # PDFBody[] self.binary = False self.binaryChars = '' self.linearized = False @@ -4754,15 +4769,15 @@ def __init__(self) : self.ownerPass = '' self.userPass = '' self.JSCode = '' - self.crossRefTable = [] # PDFCrossRefSection[] - self.comments = [] # string[] + self.crossRefTable = [] # PDFCrossRefSection[] + self.comments = [] # string[] self.version = '' self.headerOffset = 0 self.garbageHeader = '' self.suspiciousElements = {} self.updates = 0 self.endLine = '' - self.trailer = [] # PDFTrailer[] + self.trailer = [] # PDFTrailer[] self.errors = [] self.numObjects = 0 self.numStreams = 0 @@ -4772,19 +4787,19 @@ def __init__(self) : self.maxObjectId = 0 def addBody(self, newBody): - if newBody != None and isinstance(newBody,PDFBody): + if newBody is not None and isinstance(newBody, PDFBody): self.body.append(newBody) - return (0,'') + return (0, '') else: - return (-1,'Bad PDFBody supplied') + return (-1, 'Bad PDFBody supplied') def addCrossRefTableSection(self, newSectionArray): - if newSectionArray != None and isinstance(newSectionArray,list) and len(newSectionArray) == 2 and (newSectionArray[0] == None or isinstance(newSectionArray[0],PDFCrossRefSection)) and (newSectionArray[1] == None or isinstance(newSectionArray[1],PDFCrossRefSection)): + if newSectionArray is not None and isinstance(newSectionArray, list) and len(newSectionArray) == 2 and (newSectionArray[0] is None or isinstance(newSectionArray[0], PDFCrossRefSection)) and (newSectionArray[1] is None or isinstance(newSectionArray[1], PDFCrossRefSection)): self.crossRefTable.append(newSectionArray) - return (0,'') + return (0, '') else: - return (-1,'Bad PDFCrossRefSection array supplied') - + return (-1, 'Bad PDFCrossRefSection array supplied') + def addError(self, errorMessage): if errorMessage not in self.errors: self.errors.append(errorMessage) @@ -4794,10 +4809,10 @@ def addNumDecodingErrors(self, num): def addNumEncodedStreams(self, num): self.numEncodedStreams += num - + def addNumObjects(self, num): self.numObjects += num - + def addNumStreams(self, num): self.numStreams += num @@ -4805,35 +4820,35 @@ def addNumURIs(self, num): self.numURIs += num def addTrailer(self, newTrailerArray): - if newTrailerArray != None and isinstance(newTrailerArray,list) and len(newTrailerArray) == 2 and (newTrailerArray[0] == None or isinstance(newTrailerArray[0],PDFTrailer)) and (newTrailerArray[1] == None or isinstance(newTrailerArray[1],PDFTrailer)): + if newTrailerArray is not None and isinstance(newTrailerArray, list) and len(newTrailerArray) == 2 and (newTrailerArray[0] is None or isinstance(newTrailerArray[0], PDFTrailer)) and (newTrailerArray[1] is None or isinstance(newTrailerArray[1], PDFTrailer)): self.trailer.append(newTrailerArray) - return (0,'') + return (0, '') else: - return (-1,'Bad PDFTrailer array supplied') + return (-1, 'Bad PDFTrailer array supplied') - def createObjectStream(self, version = None, id = None, objectIds = []): + def createObjectStream(self, version=None, id=None, objectIds=[]): errorMessage = '' tmpStreamObjects = '' tmpStreamObjectsInfo = '' compressedStream = '' compressedDict = {} firstObjectOffset = '' - if version == None: + if version is None: version = self.updates if objectIds == []: objectIds = self.body[version].getObjectsIds() numObjects = len(objectIds) - if id == None: + if id is None: id = self.maxObjectId + 1 for compressedId in objectIds: object = self.body[version].getObject(compressedId) - if object == None: + if object is None: errorMessage = 'Object '+str(compressedId)+' cannot be compressed: it does not exist' if isForceMode: self.addError(errorMessage) numObjects -= 1 else: - return (-1,errorMessage) + return (-1, errorMessage) else: objectType = object.getType() if objectType == 'stream': @@ -4849,24 +4864,29 @@ def createObjectStream(self, version = None, id = None, objectIds = []): offset = len(tmpStreamObjects) tmpStreamObjectsInfo += str(compressedId)+' '+str(offset)+' ' tmpStreamObjects += object.toFile() - ret = self.body[version].setObject(compressedId,object,offset,modification = True) + ret = self.body[version].setObject(compressedId, object, offset, modification=True) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) firstObjectOffset = str(len(tmpStreamObjectsInfo)) compressedStream = tmpStreamObjectsInfo + tmpStreamObjects - compressedDict = {'/Type':PDFName('ObjStm'),'/N':PDFNum(str(numObjects)),'/First':PDFNum(firstObjectOffset),'/Length':PDFNum(str(len(compressedStream)))} + compressedDict = { + '/Type': PDFName('ObjStm'), + '/N': PDFNum(str(numObjects)), + '/First': PDFNum(firstObjectOffset), + '/Length': PDFNum(str(len(compressedStream))) + } try: - objectStream = PDFObjectStream('',compressedStream,compressedDict,{},{}) + objectStream = PDFObjectStream('', compressedStream, compressedDict, {}, {}) except Exception as e: errorMessage = 'Error creating PDFObjectStream' if e.message != '': errorMessage += ': '+e.message self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) # Filters filterObject = PDFName('FlateDecode') - ret = objectStream.setElement('/Filter',filterObject) + ret = objectStream.setElement('/Filter', filterObject) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) @@ -4883,24 +4903,24 @@ def createObjectStream(self, version = None, id = None, objectIds = []): errorMessage = ret[1] self.addError(ret[1]) self.body[version].setNextOffset(objectStreamOffset+len(objectStream.getRawValue())) - self.body[version].setObject(id,objectStream,objectStreamOffset) + self.body[version].setObject(id, objectStream, objectStreamOffset) # Xref stream ret = self.createXrefStream(version) if ret[0] == -1: return ret xrefStreamId, xrefStream = ret[1] xrefStreamOffset = self.body[version].getNextOffset() - ret = self.body[version].setObject(xrefStreamId,xrefStream,xrefStreamOffset) + ret = self.body[version].setObject(xrefStreamId, xrefStream, xrefStreamOffset) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) self.binary = True self.binaryChars = '\xC0\xFF\xEE\xFA\xBA\xDA' if errorMessage != '': - return (-1,errorMessage) - return (0,id) + return (-1, errorMessage) + return (0, id) - def createXrefStream(self, version, id = None): + def createXrefStream(self, version, id=None): size = 0 elementsDict = {} elementsTrailerDict = {} @@ -4911,19 +4931,19 @@ def createXrefStream(self, version, id = None): xrefStreamId = None bytesPerFieldArray = [] - if version == None: + if version is None: version = self.updates # Trailer update if len(self.trailer) > version: - if self.trailer[version][1] != None: + if self.trailer[version][1] is not None: trailerDict = self.trailer[version][1].getTrailerDictionary() - if trailerDict != None: + if trailerDict is not None: elementsTrailerDict = dict(trailerDict.getElements()) elementsDict = dict(elementsTrailerDict) del(trailerDict) - if self.trailer[version][0] != None: + if self.trailer[version][0] is not None: trailerDict = self.trailer[version][0].getTrailerDictionary() - if trailerDict != None: + if trailerDict is not None: trailerElementsDict = dict(trailerDict.getElements()) if len(trailerElementsDict) > 0: for key in trailerElementsDict: @@ -4931,12 +4951,12 @@ def createXrefStream(self, version, id = None): elementsTrailerDict[key] = trailerElementsDict[key] elementsDict[key] = trailerElementsDict[key] del(trailerElementsDict) - del(trailerDict) + del(trailerDict) self.createXrefStreamSection(version) if len(self.crossRefTable) <= version: errorMessage = 'Cross Reference Table not found' self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) section = self.crossRefTable[version][1] xrefStreamId = section.getXrefStreamObject() bytesPerField = section.getBytesPerField() @@ -4945,8 +4965,7 @@ def createXrefStream(self, version, id = None): bytesPerFieldArray.append(PDFNum(str(num))) except: errorMessage = 'Error creating PDFNum in bytesPerField' - return (-1,errorMessage) - subsectionsNumber = section.getSubsectionsNumber() + return (-1, errorMessage) subsections = section.getSubsectionsArray() for subsection in subsections: firstObject = subsection.getFirstObject() @@ -4958,31 +4977,31 @@ def createXrefStream(self, version, id = None): ret = entry.getEntryBytes(bytesPerField) if ret[0] == -1: self.addError(ret[1]) - return (-1,ret[1]) + return (-1, ret[1]) stream += ret[1] if size < firstObject + numObjects: size = firstObject + numObjects elementsDict['/Type'] = PDFName('XRef') elementsDict['/Size'] = PDFNum(str(size)) elementsTrailerDict['/Size'] = PDFNum(str(size)) - elementsDict['/Index'] = PDFArray('',indexArray) - elementsDict['/W'] = PDFArray('',bytesPerFieldArray) + elementsDict['/Index'] = PDFArray('', indexArray) + elementsDict['/W'] = PDFArray('', bytesPerFieldArray) elementsDict['/Length'] = PDFNum(str(len(stream))) try: - xrefStream = PDFStream('',stream,elementsDict,{}) + xrefStream = PDFStream('', stream, elementsDict, {}) except Exception as e: errorMessage = 'Error creating PDFStream' if e.message != '': errorMessage += ': '+e.message self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) # Filters filterObject = PDFName('FlateDecode') - if id != None: + if id is not None: xrefStreamObject = self.getObject(id, version) - if xrefStreamObject != None: + if xrefStreamObject is not None: filterObject = xrefStreamObject.getElementByName('/Filter') - ret = xrefStream.setElement('/Filter',filterObject) + ret = xrefStream.setElement('/Filter', filterObject) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) @@ -4993,32 +5012,32 @@ def createXrefStream(self, version, id = None): if e.message != '': errorMessage += ': '+e.message self.addError(errorMessage) - return (-1,errorMessage) + return (-1, errorMessage) trailerStream.setXrefStreamObject(xrefStreamId) try: - trailerSection = PDFTrailer(PDFDictionary(elements=dict(elementsTrailerDict)))#PDFDictionary()) + trailerSection = PDFTrailer(PDFDictionary(elements=dict(elementsTrailerDict))) # PDFDictionary()) except Exception as e: errorMessage = 'Error creating PDFTrailer' if e.message != '': errorMessage += ': '+e.message self.addError(errorMessage) - return (-1,errorMessage) - self.trailer[version] = [trailerSection,trailerStream] + return (-1, errorMessage) + self.trailer[version] = [trailerSection, trailerStream] if errorMessage != '': - return (-1,errorMessage) - return (0,[xrefStreamId,xrefStream]) - - def createXrefStreamSection(self, version = None): + return (-1, errorMessage) + return (0, [xrefStreamId, xrefStream]) + + def createXrefStreamSection(self, version=None): lastId = 0 lastFreeObject = 0 errorMessage = '' xrefStreamId = None - xrefEntries = [PDFCrossRefEntry(0,65535,0)] - if version == None: + xrefEntries = [PDFCrossRefEntry(0, 65535, 0)] + if version is None: version = self.updates actualStream = self.crossRefTable[version][1] - if actualStream != None: - xrefStreamId = actualStream.getXrefStreamObject() + if actualStream is not None: + xrefStreamId = actualStream.getXrefStreamObject() sortedObjectsByOffset = self.body[version].getObjectsIds() sortedObjectsIds = sorted(sortedObjectsByOffset, key=lambda x: int(x)) indirectObjects = self.body[version].getObjects() @@ -5029,41 +5048,41 @@ def createXrefStreamSection(self, version = None): xrefEntries[lastFreeObject] = lastFreeEntry lastFreeObject = lastId+1 lastId += 1 - xrefEntries.append(PDFCrossRefEntry(0,65535,0)) + xrefEntries.append(PDFCrossRefEntry(0, 65535, 0)) indirectObject = indirectObjects[id] - if indirectObject != None: + if indirectObject is not None: object = indirectObject.getObject() - if object != None: + if object is not None: if object.isCompressed(): objectStreamId = object.getCompressedIn() objectStream = self.body[version].getObject(objectStreamId) index = objectStream.getObjectIndex(id) - if index == None: + if index is None: errorMessage = 'Compressed object not found in object stream' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) - entry = PDFCrossRefEntry(objectStreamId,index,2) + return (-1, errorMessage) + entry = PDFCrossRefEntry(objectStreamId, index, 2) else: offset = indirectObject.getOffset() - entry = PDFCrossRefEntry(offset,0,1) + entry = PDFCrossRefEntry(offset, 0, 1) xrefEntries.append(entry) lastId = id - if actualStream == None: + if actualStream is None: offset += len(str(object.getRawValue())) - xrefEntries.append(PDFCrossRefEntry(offset,0,1)) + xrefEntries.append(PDFCrossRefEntry(offset, 0, 1)) lastId += 1 xrefStreamId = lastId - subsection = PDFCrossRefSubSection(0,lastId+1,xrefEntries) + subsection = PDFCrossRefSubSection(0, lastId+1, xrefEntries) xrefSection = PDFCrossRefSection() xrefSection.addSubsection(subsection) xrefSection.setXrefStreamObject(xrefStreamId) - xrefSection.setBytesPerField([1,2,2]) - self.crossRefTable[version] = [None,xrefSection] + xrefSection.setBytesPerField([1, 2, 2]) + self.crossRefTable[version] = [None, xrefSection] if errorMessage != '': - return (-1,errorMessage) - return (0,lastId) + return (-1, errorMessage) + return (0, lastId) def decrypt(self, password=''): badPassword = False @@ -5076,24 +5095,24 @@ def decrypt(self, password=''): strAlgorithm = None embedAlgorithm = None computedUserPass = '' - dictO = '' + dictO = '' dictU = '' perm = 0 revision = 0 fileId = self.getFileId() - self.removeError(errorType = 'Decryption error') - if self.encryptDict == None or self.encryptDict[1] == []: + self.removeError(errorType='Decryption error') + if self.encryptDict is None or self.encryptDict[1] == []: errorMessage = 'Decryption error: /Encrypt dictionary not found!!' if isForceMode: self.addError(errorMessage) else: - return (-1,errorMessage) + return (-1, errorMessage) # Getting /Encrypt elements encDict = self.encryptDict[1] # Filter - if encDict.has_key('/Filter'): + if '/Filter' in encDict: filter = encDict['/Filter'] - if filter != None and filter.getType() == 'name': + if filter is not None and filter.getType() == 'name': filter = filter.getValue() if filter != '/Standard': errorMessage = 'Decryption error: Filter not supported!!' @@ -5117,29 +5136,29 @@ def decrypt(self, password=''): else: return (-1, errorMessage) # Algorithm version - if encDict.has_key('/V'): + if "/V" in encDict: algVersion = encDict['/V'] - if algVersion != None and algVersion.getType() == 'integer': + if algVersion is not None and algVersion.getType() == 'integer': algVersion = algVersion.getRawValue() if algVersion == 4 or algVersion == 5: - stmAlgorithm = ['Identity',40] - strAlgorithm = ['Identity',40] - embedAlgorithm = ['Identity',40] + stmAlgorithm = ['Identity', 40] + strAlgorithm = ['Identity', 40] + embedAlgorithm = ['Identity', 40] algorithms = {} - if encDict.has_key('/CF'): + if "/CF" in encDict: cfDict = encDict['/CF'] - if cfDict != None and cfDict.getType() == 'dictionary': + if cfDict is not None and cfDict.getType() == 'dictionary': cfDict = cfDict.getElements() for cryptFilter in cfDict: cryptFilterDict = cfDict[cryptFilter] - if cryptFilterDict != None and cryptFilterDict.getType() == 'dictionary': + if cryptFilterDict is not None and cryptFilterDict.getType() == 'dictionary': algorithms[cryptFilter] = [] defaultKeyLength = 40 cfmValue = '' cryptFilterDict = cryptFilterDict.getElements() - if cryptFilterDict.has_key('/CFM'): + if "/CFM" in cryptFilterDict: cfmValue = cryptFilterDict['/CFM'] - if cfmValue != None and cfmValue.getType() == 'name': + if cfmValue is not None and cfmValue.getType() == 'name': cfmValue = cfmValue.getValue() if cfmValue == 'None': algorithms[cryptFilter].append('Identity') @@ -5164,10 +5183,10 @@ def decrypt(self, password=''): self.addError(errorMessage) else: return (-1, errorMessage) - if cryptFilterDict.has_key('/Length') and cfmValue != '/AESV3': + if "/Length" in cryptFilterDict and cfmValue != '/AESV3': # Length is key length in bits keyLength = cryptFilterDict['/Length'] - if keyLength != None and keyLength.getType() == 'integer': + if keyLength is not None and keyLength.getType() == 'integer': keyLength = keyLength.getRawValue() if keyLength % 8 != 0: keyLength = defaultKeyLength @@ -5187,9 +5206,9 @@ def decrypt(self, password=''): self.addError(errorMessage) else: return (-1, errorMessage) - if encDict.has_key('/StmF'): + if "/StmF" in encDict: stmF = encDict['/StmF'] - if stmF != None and stmF.getType() == 'name': + if stmF is not None and stmF.getType() == 'name': stmF = stmF.getValue() if stmF in algorithms: stmAlgorithm = algorithms[stmF] @@ -5199,9 +5218,9 @@ def decrypt(self, password=''): self.addError(errorMessage) else: return (-1, errorMessage) - if encDict.has_key('/StrF'): + if "/StrF" in encDict: strF = encDict['/StrF'] - if strF != None and strF.getType() == 'name': + if strF is not None and strF.getType() == 'name': strF = strF.getValue() if strF in algorithms: strAlgorithm = algorithms[strF] @@ -5211,9 +5230,9 @@ def decrypt(self, password=''): self.addError(errorMessage) else: return (-1, errorMessage) - if encDict.has_key('/EEF'): + if "/EEF" in encDict: eeF = encDict['/EEF'] - if eeF != None and eeF.getType() == 'name': + if eeF is not None and eeF.getType() == 'name': eeF = eeF.getValue() if eeF in algorithms: embedAlgorithm = algorithms[eeF] @@ -5227,11 +5246,11 @@ def decrypt(self, password=''): else: embedAlgorithm = stmAlgorithm if stmAlgorithm not in encryptionAlgorithms: - encryptionAlgorithms.append(stmAlgorithm) + encryptionAlgorithms.append(stmAlgorithm) if strAlgorithm not in encryptionAlgorithms: encryptionAlgorithms.append(strAlgorithm) - if embedAlgorithm not in encryptionAlgorithms and embedAlgorithm != ['Identity',40]: # Not showing default embedAlgorithm - encryptionAlgorithms.append(embedAlgorithm) + if embedAlgorithm not in encryptionAlgorithms and embedAlgorithm != ['Identity', 40]: # Not showing default embedAlgorithm + encryptionAlgorithms.append(embedAlgorithm) else: errorMessage = 'Decryption error: Bad format for /V!!' if isForceMode: @@ -5246,11 +5265,11 @@ def decrypt(self, password=''): self.addError(errorMessage) else: return (-1, errorMessage) - + # Key length - if encDict.has_key('/Length'): + if "/Length" in encDict: keyLength = encDict['/Length'] - if keyLength != None and keyLength.getType() == 'integer': + if keyLength is not None and keyLength.getType() == 'integer': keyLength = keyLength.getRawValue() if keyLength % 8 != 0: keyLength = 40 @@ -5260,30 +5279,30 @@ def decrypt(self, password=''): self.addError('Decryption error: Bad format for /Length!!') else: keyLength = 40 - + # Setting algorithms if algVersion == 1 or algVersion == 2: - algorithm = ['RC4',keyLength] + algorithm = ['RC4', keyLength] stmAlgorithm = strAlgorithm = embedAlgorithm = algorithm elif algVersion == 3: errorMessage = 'Decryption error: Algorithm not supported!!' if isForceMode: - algorithm = ['Unpublished',keyLength] + algorithm = ['Unpublished', keyLength] stmAlgorithm = strAlgorithm = embedAlgorithm = algorithm self.addError(errorMessage) else: return (-1, errorMessage) elif algVersion == 5: - algorithm = ['AES',256] - if algorithm != None and algorithm not in encryptionAlgorithms: + algorithm = ['AES', 256] + if algorithm is not None and algorithm not in encryptionAlgorithms: encryptionAlgorithms.append(algorithm) self.setEncryptionAlgorithms(encryptionAlgorithms) - + # Standard encryption: /R /P /O /U # Revision - if encDict.has_key('/R'): + if "/R" in encDict: revision = encDict['/R'] - if revision != None and revision.getType() == 'integer': + if revision is not None and revision.getType() == 'integer': revision = revision.getRawValue() if revision < 2 or revision > 5: errorMessage = 'Decryption error: Algorithm revision not supported!!' @@ -5308,9 +5327,9 @@ def decrypt(self, password=''): else: return (-1, errorMessage) # Permission - if encDict.has_key('/P'): + if "/P" in encDict: perm = encDict['/P'] - if perm != None and perm.getType() == 'integer': + if perm is not None and perm.getType() == 'integer': perm = perm.getRawValue() else: errorMessage = 'Decryption error: Bad format for /P!!' @@ -5328,9 +5347,9 @@ def decrypt(self, password=''): else: return (-1, errorMessage) # Owner pass - if encDict.has_key('/O'): + if "/O" in encDict: dictO = encDict['/O'] - if dictO != None and dictO.getType() in ['string','hexstring']: + if dictO is not None and dictO.getType() in ['string', 'hexstring']: dictO = dictO.getValue() else: errorMessage = 'Decryption error: Bad format for /O!!' @@ -5348,9 +5367,9 @@ def decrypt(self, password=''): else: return (-1, errorMessage) # Owner encrypted string - if encDict.has_key('/OE'): + if "/OE" in encDict: dictOE = encDict['/OE'] - if dictOE != None and dictOE.getType() in ['string','hexstring']: + if dictOE is not None and dictOE.getType() in ['string', 'hexstring']: dictOE = dictOE.getValue() else: errorMessage = 'Decryption error: Bad format for /OE!!' @@ -5368,9 +5387,9 @@ def decrypt(self, password=''): else: return (-1, errorMessage) # User pass - if encDict.has_key('/U'): + if "/U" in encDict: dictU = encDict['/U'] - if dictU != None and dictU.getType() in ['string','hexstring']: + if dictU is not None and dictU.getType() in ['string', 'hexstring']: dictU = dictU.getValue() else: errorMessage = 'Decryption error: Bad format for /U!!' @@ -5388,9 +5407,9 @@ def decrypt(self, password=''): else: return (-1, errorMessage) # User encrypted string - if encDict.has_key('/UE'): + if "/UE" in encDict: dictUE = encDict['/UE'] - if dictUE != None and dictUE.getType() in ['string','hexstring']: + if dictUE is not None and dictUE.getType() in ['string', 'hexstring']: dictUE = dictUE.getValue() else: errorMessage = 'Decryption error: Bad format for /UE!!' @@ -5408,9 +5427,9 @@ def decrypt(self, password=''): else: return (-1, errorMessage) # Metadata encryption - if encDict.has_key('/EncryptMetadata'): + if "/EncryptMetadata" in encDict: encryptMetadata = encDict['/EncryptMetadata'] - if encryptMetadata != None and encryptMetadata.getType() == 'bool': + if encryptMetadata is not None and encryptMetadata.getType() == 'bool': encryptMetadata = encryptMetadata.getValue() != 'false' else: errorMessage = 'Decryption error: Bad format for /EncryptMetadata!!' @@ -5421,6 +5440,10 @@ def decrypt(self, password=''): return (-1, errorMessage) else: encryptMetadata = True + if six.PY3 and isinstance(password, str): + password = password.encode('latin-1') + dictO = dictO.encode('latin-1') + fileId = fileId.encode('latin-1') if not fatalError: # Checking user password if revision != 5: @@ -5471,16 +5494,16 @@ def decrypt(self, password=''): for v in range(self.updates+1): indirectObjectsIds = list(set(self.body[v].getObjectsIds())) for id in indirectObjectsIds: - indirectObject = self.body[v].getObject(id, indirect = True) - if indirectObject != None: + indirectObject = self.body[v].getObject(id, indirect=True) + if indirectObject is not None: generationNum = indirectObject.getGenerationNumber() object = indirectObject.getObject() - if object != None and not object.isCompressed(): + if object is not None and not object.isCompressed(): objectType = object.getType() if objectType in ['string', 'hexstring', 'array', 'dictionary'] or \ (objectType == 'stream' and (object.getElement('/Type') is None or - (object.getElement('/Type').getValue() not in ['/XRef', '/Metadata'] or - (object.getElement('/Type').getValue() == '/Metadata' and encryptMetadata)))): + (object.getElement('/Type').getValue() not in ['/XRef', '/Metadata'] or + (object.getElement('/Type').getValue() == '/Metadata' and encryptMetadata)))): key = self.encryptionKey # Removing already set global stats before modifying the object contents self.body[v].updateStats(id, object, delete=True) @@ -5495,9 +5518,9 @@ def decrypt(self, password=''): key = ret[1] ret = object.decrypt(key, strAlgorithm[0]) else: - if object.getElement('/Type') != None and object.getElement('/Type').getValue() == '/EmbeddedFile': + if object.getElement('/Type') is not None and object.getElement('/Type').getValue() == '/EmbeddedFile': if revision < 5: - ret = computeObjectKey(id,generationNum,self.encryptionKey,numKeyBytes,embedAlgorithm[0]) + ret = computeObjectKey(id, generationNum, self.encryptionKey, numKeyBytes, embedAlgorithm[0]) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) @@ -5506,7 +5529,7 @@ def decrypt(self, password=''): altAlgorithm = embedAlgorithm[0] else: if revision < 5: - ret = computeObjectKey(id,generationNum,self.encryptionKey,numKeyBytes,stmAlgorithm[0]) + ret = computeObjectKey(id, generationNum, self.encryptionKey, numKeyBytes, stmAlgorithm[0]) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) @@ -5523,12 +5546,12 @@ def decrypt(self, password=''): self.addError(ret[1]) if errorMessage != '': return (-1, errorMessage) - return (0,'') + return (0, '') - def deleteObject (self, id) : - # Remove references too + def deleteObject(self, id): + # Remove references too pass - + def encodeChars(self): errorMessage = '' for i in range(self.updates+1): @@ -5537,7 +5560,7 @@ def encodeChars(self): errorMessage = ret[1] self.addError(errorMessage) trailerArray = self.trailer[i] - if trailerArray[0] != None: + if trailerArray[0] is not None: ret = trailerArray[0].encodeChars() if ret[0] == -1: errorMessage = ret[1] @@ -5545,10 +5568,10 @@ def encodeChars(self): self.trailer[i] = trailerArray if errorMessage != '': return (-1, errorMessage) - return (0,'') - - def encrypt(self, password = ''): - #TODO: AESV2 and V3 + return (0, '') + + def encrypt(self, password=''): + # TODO: AESV2 and V3 errorMessage = '' encryptDictId = None encryptMetadata = True @@ -5556,11 +5579,11 @@ def encrypt(self, password = ''): dictOE = '' dictUE = '' ret = self.getTrailer() - if ret != None: - trailer,trailerStream = ret[1] - if trailerStream != None: + if ret is not None: + trailer, trailerStream = ret[1] + if trailerStream is not None: encryptDict = trailerStream.getDictEntry('/Encrypt') - if encryptDict != None: + if encryptDict is not None: encryptDictType = encryptDict.getType() if encryptDictType == 'reference': encryptDictId = encryptDict.getId() @@ -5568,12 +5591,12 @@ def encrypt(self, password = ''): if fileId == '': fileId = hashlib.md5(str(random.random())).hexdigest() md5Object = PDFString(fileId) - fileIdArray = PDFArray(elements=[md5Object,md5Object]) - trailerStream.setDictEntry('/ID',fileIdArray) - self.setTrailer([trailer,trailerStream]) + fileIdArray = PDFArray(elements=[md5Object, md5Object]) + trailerStream.setDictEntry('/ID', fileIdArray) + self.setTrailer([trailer, trailerStream]) else: encryptDict = trailer.getDictEntry('/Encrypt') - if encryptDict != None: + if encryptDict is not None: encryptDictType = encryptDict.getType() if encryptDictType == 'reference': encryptDictId = encryptDict.getId() @@ -5581,29 +5604,29 @@ def encrypt(self, password = ''): if fileId == '': fileId = hashlib.md5(str(random.random())).hexdigest() md5Object = PDFString(fileId) - fileIdArray = PDFArray(elements=[md5Object,md5Object]) - trailer.setDictEntry('/ID',fileIdArray) - self.setTrailer([trailer,trailerStream]) - - ret = computeOwnerPass(password,password,128,revision = 3) + fileIdArray = PDFArray(elements=[md5Object, md5Object]) + trailer.setDictEntry('/ID', fileIdArray) + self.setTrailer([trailer, trailerStream]) + + ret = computeOwnerPass(password, password, 128, revision=3) if ret[0] != -1: dictO = ret[1] else: if isForceMode: self.addError(ret[1]) else: - return (-1,ret[1]) + return (-1, ret[1]) self.setOwnerPass(dictO) - ret = computeUserPass(password,dictO,fileId,permissionNum,128,revision = 3) + ret = computeUserPass(password, dictO, fileId, permissionNum, 128, revision=3) if ret[0] != -1: dictU = ret[1] else: if isForceMode: self.addError(ret[1]) else: - return (-1,ret[1]) + return (-1, ret[1]) self.setUserPass(dictU) - ret = computeEncryptionKey(password, dictO, dictU, dictOE, dictUE, fileId, permissionNum, 128, revision = 3, encryptMetadata = encryptMetadata, passwordType = 'USER') + ret = computeEncryptionKey(password, dictO, dictU, dictOE, dictUE, fileId, permissionNum, 128, revision=3, encryptMetadata=encryptMetadata, passwordType='USER') if ret[0] != -1: encryptionKey = ret[1] else: @@ -5611,36 +5634,43 @@ def encrypt(self, password = ''): if isForceMode: self.addError(ret[1]) else: - return (-1,ret[1]) + return (-1, ret[1]) self.setEncryptionKey(encryptionKey) self.setEncryptionKeyLength(128) - encryptDict = PDFDictionary(elements = {'/V':PDFNum('2'),'/Length':PDFNum('128'),'/Filter':PDFName('Standard'), - '/R':PDFNum('3'),'/P':PDFNum(str(permissionNum)),'/O':PDFString(dictO),'/U':PDFString(dictU)}) - if encryptDictId != None: - ret = self.setObject(encryptDictId,encryptDict) + encryptDict = PDFDictionary(elements={ + '/V': PDFNum('2'), + '/Length': PDFNum('128'), + '/Filter': PDFName('Standard'), + '/R': PDFNum('3'), + '/P': PDFNum(str(permissionNum)), + '/O': PDFString(dictO), + '/U': PDFString(dictU) + }) + if encryptDictId is not None: + ret = self.setObject(encryptDictId, encryptDict) if ret[0] == -1: errorMessage = '/Encrypt dictionary has not been created/modified' self.addError(errorMessage) return (-1, errorMessage) else: - if trailerStream != None: - trailerStream.setDictEntry('/Encrypt',encryptDict) + if trailerStream is not None: + trailerStream.setDictEntry('/Encrypt', encryptDict) else: - trailer.setDictEntry('/Encrypt',encryptDict) - self.setTrailer([trailer,trailerStream]) - + trailer.setDictEntry('/Encrypt', encryptDict) + self.setTrailer([trailer, trailerStream]) + numKeyBytes = self.encryptionKeyLength/8 for v in range(self.updates+1): indirectObjects = self.body[v].getObjects() for id in indirectObjects: indirectObject = indirectObjects[id] - if indirectObject != None: + if indirectObject is not None: generationNum = indirectObject.getGenerationNumber() object = indirectObject.getObject() - if object != None and not object.isCompressed(): + if object is not None and not object.isCompressed(): objectType = object.getType() - if objectType in ['string','hexstring','array','dictionary'] or (objectType == 'stream' and (object.getElement('/Type') == None or (object.getElement('/Type').getValue() not in ['/XRef','/Metadata'] or (object.getElement('/Type').getValue() == '/Metadata' and encryptMetadata)))): - ret = computeObjectKey(id,generationNum,self.encryptionKey,numKeyBytes) + if objectType in ['string', 'hexstring', 'array', 'dictionary'] or (objectType == 'stream' and (object.getElement('/Type') is None or (object.getElement('/Type').getValue() not in ['/XRef', '/Metadata'] or (object.getElement('/Type').getValue() == '/Metadata' and encryptMetadata)))): + ret = computeObjectKey(id, generationNum, self.encryptionKey, numKeyBytes) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) @@ -5650,7 +5680,7 @@ def encrypt(self, password = ''): if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) - ret = self.body[v].setObject(id,object) + ret = self.body[v].setObject(id, object) if ret[0] == -1: errorMessage = ret[1] self.addError(ret[1]) @@ -5660,99 +5690,119 @@ def encrypt(self, password = ''): if errorMessage != '': return (-1, errorMessage) self.setEncrypted(True) - return (0,'') - + return (0, '') + def getBasicMetadata(self, version): basicMetadata = {} - + # Getting creation information infoObject = self.getInfoObject(version) - if infoObject != None: + if infoObject is not None: author = infoObject.getElementByName('/Author') - if author != None and author != []: + if author is not None and author != []: basicMetadata['author'] = author.getValue() + + subject = infoObject.getElementByName('/Subject') + if subject is not None and subject != []: + basicMetadata['subject'] = subject.getValue() + + title = infoObject.getElementByName('/Title') + if title is not None and title != []: + basicMetadata['title'] = title.getValue() + creator = infoObject.getElementByName('/Creator') - if creator != None and creator != []: + if creator is not None and creator != []: basicMetadata['creator'] = creator.getValue() + producer = infoObject.getElementByName('/Producer') - if producer != None and producer != []: + if producer is not None and producer != []: basicMetadata['producer'] = producer.getValue() + creationDate = infoObject.getElementByName('/CreationDate') - if creationDate != None and creationDate != []: + if creationDate is not None and creationDate != []: basicMetadata['creation'] = creationDate.getValue() - if not basicMetadata.has_key('author'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "author" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: author = self.getMetadataElement(id, version, 'dc:creator') - if author != None: + if author is not None: basicMetadata['author'] = author break - if not basicMetadata.has_key('creator'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "creator" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: creator = self.getMetadataElement(id, version, 'xap:CreatorTool') - if creator != None: + if creator is not None: basicMetadata['creator'] = creator break - if not basicMetadata.has_key('creator'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "creator" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: creator = self.getMetadataElement(id, version, 'xmp:CreatorTool') - if creator != None: + if creator is not None: basicMetadata['creator'] = creator break - if not basicMetadata.has_key('producer'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "producer" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: producer = self.getMetadataElement(id, version, 'pdf:Producer') - if producer != None: + if producer is not None: basicMetadata['producer'] = producer break - if not basicMetadata.has_key('creation'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "creation" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: creation = self.getMetadataElement(id, version, 'xap:CreateDate') - if creation != None: + if creation is not None: basicMetadata['creation'] = creation break - if not basicMetadata.has_key('creation'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "creation" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: creation = self.getMetadataElement(id, version, 'xmp:CreateDate') - if creation != None: + if creation is not None: basicMetadata['creation'] = creation break - if not basicMetadata.has_key('modification'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "modification" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: modification = self.getMetadataElement(id, version, 'xap:ModifyDate') - if modification != None: + if modification is not None: basicMetadata['modification'] = modification break - if not basicMetadata.has_key('modification'): - ids = self.getObjectsByString('',version) - if ids != None and ids != []: + + if "modification" not in basicMetadata: + ids = self.getObjectsByString('', version) + if ids is not None and ids != []: for id in ids: modification = self.getMetadataElement(id, version, 'xmp:ModifyDate') - if modification != None: + if modification is not None: basicMetadata['modification'] = modification break + return basicMetadata - + def getCatalogObject(self, version=None, indirect=False): - if version == None: + if version is None: catalogObjects = [] catalogIds = self.getCatalogObjectId() - for i in xrange(len(catalogIds)): + for i in range(len(catalogIds)): id = catalogIds[i] - if id != None: + if id is not None: catalogObject = self.getObject(id, i, indirect) catalogObjects.append(catalogObject) else: @@ -5760,34 +5810,34 @@ def getCatalogObject(self, version=None, indirect=False): return catalogObjects else: catalogId = self.getCatalogObjectId(version) - if catalogId != None: + if catalogId is not None: catalogObject = self.getObject(catalogId, version, indirect) return catalogObject else: return None - def getCatalogObjectId(self, version = None): - if version == None: + def getCatalogObjectId(self, version=None): + if version is None: catalogIds = [] for v in range(self.updates+1): catalogId = None trailer, streamTrailer = self.trailer[v] - if trailer != None: + if trailer is not None: catalogId = trailer.getCatalogId() - if catalogId == None and streamTrailer != None: + if catalogId is None and streamTrailer is not None: catalogId = streamTrailer.getCatalogId() catalogIds.append(catalogId) return catalogIds else: catalogId = None trailer, streamTrailer = self.trailer[version] - if trailer != None: + if trailer is not None: catalogId = trailer.getCatalogId() - if catalogId == None and streamTrailer != None: + if catalogId is None and streamTrailer is not None: catalogId = streamTrailer.getCatalogId() return catalogId - def getChangeLog (self, version = None) : + def getChangeLog(self, version=None): lastVersionObjects = [] actualVersionObjects = [] addedObjects = [] @@ -5795,7 +5845,7 @@ def getChangeLog (self, version = None) : modifiedObjects = [] notMatchingObjects = [] changes = [] - if version == None: + if version is None: version = self.updates + 1 else: version += 1 @@ -5806,10 +5856,10 @@ def getChangeLog (self, version = None) : xrefFreeObjects = [] crossRefSection = self.crossRefTable[i][0] crossRefStreamSection = self.crossRefTable[i][1] - if crossRefSection != None: + if crossRefSection is not None: xrefNewObjects += crossRefSection.getNewObjectIds() xrefFreeObjects += crossRefSection.getFreeObjectIds() - if crossRefStreamSection != None: + if crossRefStreamSection is not None: xrefNewObjects += crossRefStreamSection.getNewObjectIds() xrefFreeObjects += crossRefStreamSection.getFreeObjectIds() for id in actualVersionObjects: @@ -5827,7 +5877,7 @@ def getChangeLog (self, version = None) : lastVersionObjects.remove(id) if id in xrefNewObjects: notMatchingObjects.append(id) - changes.append([addedObjects,modifiedObjects,removedObjects,notMatchingObjects]) + changes.append([addedObjects, modifiedObjects, removedObjects, notMatchingObjects]) addedObjects = [] removedObjects = [] modifiedObjects = [] @@ -5844,31 +5894,31 @@ def getDetectionReport(self): def getEndLine(self): return self.endLine - + def getEncryptDict(self): return self.encryptDict - + def getEncryptionAlgorithms(self): return self.encryptionAlgorithms - + def getEncryptionKey(self): return self.encryptionKey - + def getEncryptionKeyLength(self): return self.encryptionKeyLength - + def getErrors(self): return self.errors def getFileId(self): - return self.fileId + return self.fileId def getFileName(self): return self.fileName - + def getGarbageHeader(self): return self.garbageHeader - + def getHeaderOffset(self): return self.headerOffset @@ -5876,7 +5926,7 @@ def getInfoObject(self, version=None, indirect=False): if version is None: infoObjects = [] infoIds = self.getInfoObjectId() - for i in xrange(len(infoIds)): + for i in range(len(infoIds)): id = infoIds[i] if id is not None: infoObject = self.getObject(id, i, indirect) @@ -5896,15 +5946,15 @@ def getInfoObject(self, version=None, indirect=False): else: return None - def getInfoObjectId(self, version = None): - if version == None: + def getInfoObjectId(self, version=None): + if version is None: infoIds = [] for v in range(self.updates+1): infoId = None trailer, streamTrailer = self.trailer[v] - if trailer != None: + if trailer is not None: infoId = trailer.getInfoId() - if infoId == None and streamTrailer != None: + if infoId is None and streamTrailer is not None: infoId = streamTrailer.getInfoId() infoIds.append(infoId) else: @@ -5912,12 +5962,12 @@ def getInfoObjectId(self, version = None): else: infoId = None trailer, streamTrailer = self.trailer[version] - if trailer != None: + if trailer is not None: infoId = trailer.getInfoId() - if infoId == None and streamTrailer != None: + if infoId is None and streamTrailer is not None: infoId = streamTrailer.getInfoId() return infoId - + def getJavascriptCode(self, version=None, perObject=False): jsCode = [] if version is None: @@ -5933,23 +5983,23 @@ def getJavascriptCode(self, version=None, perObject=False): else: jsCode.append(self.body[version].getJSCode()) return jsCode - + def getLinearized(self): return self.linearized def getMD5(self): return self.md5 - - def getMetadata (self, version = None): + + def getMetadata(self, version=None): matchingObjects = self.getObjectsByString('/Metadata', version) return matchingObjects - - def getMetadataElement(self, objectId, version, element): - metadataObject = self.getObject(objectId,version) - if metadataObject != None: - if metadataObject.getType() == 'stream': + + def getMetadataElement(self, objectId, version, element): + metadataObject = self.getObject(objectId, version) + if metadataObject is not None: + if metadataObject.getType() == 'stream': stream = metadataObject.getStream() - matches = re.findall('<'+element+'>(.*)',stream) + matches = re.findall('<'+element+'>(.*)', stream) if matches != []: return matches[0] else: @@ -5961,18 +6011,18 @@ def getMetadataElement(self, objectId, version, element): def getNumUpdates(self): return self.updates - - def getObject (self, id, version = None, indirect = False) : - ''' + + def getObject(self, id, version=None, indirect=False): + ''' Returns the specified object ''' - if version == None: - for i in range(self.updates,-1,-1): + if version is None: + for i in range(self.updates, -1, -1): if indirect: object = self.body[i].getIndirectObject(id) else: object = self.body[i].getObject(id) - if object == None: + if object is None: continue else: return object @@ -5984,28 +6034,28 @@ def getObject (self, id, version = None, indirect = False) : if indirect: return self.body[version].getIndirectObject(id) else: - return self.body[version].getObject(id) + return self.body[version].getObject(id) - def getObjectsByString (self, toSearch, version = None) : + def getObjectsByString(self, toSearch, version=None): ''' Returns the object containing the specified string. ''' matchedObjects = [] - if version == None: + if version is None: for i in range(self.updates + 1): matchedObjects.append(self.body[i].getObjectsByString(toSearch)) return matchedObjects else: if version > self.updates or version < 0: - return None + return None return self.body[version].getObjectsByString(toSearch) - - def getOffsets(self, version = None): + + def getOffsets(self, version=None): offsetsArray = [] - - if version == None: - versions = range(self.updates+1) + + if version is None: + versions = list(range(self.updates+1)) else: versions = [version] - + for version in versions: offsets = {} trailer = None @@ -6016,45 +6066,45 @@ def getOffsets(self, version = None): compressedObjects = self.body[version].getCompressedObjects() objectStreams = self.body[version].getObjectStreams() ret = self.getXrefSection(version) - if ret != None: + if ret is not None: xref, streamXref = ret[1] ret = self.getTrailer(version) - if ret != None: + if ret is not None: trailer, streamTrailer = ret[1] if objectStreams != []: for objStream in objectStreams: if objStream in indirectObjects: indirectObject = indirectObjects[objStream] - if indirectObject != None: + if indirectObject is not None: objectStreamsOffsets[objStream] = indirectObject.getOffset() if version == 0: - offsets['header'] = (self.headerOffset,0) + offsets['header'] = (self.headerOffset, 0) for id in sortedObjectsIds: indirectObject = indirectObjects[id] - if indirectObject != None: + if indirectObject is not None: objectOffset = indirectObject.getOffset() object = indirectObject.getObject() - if object != None and object.isCompressed(): + if object is not None and object.isCompressed(): compressedIn = object.getCompressedIn() if compressedIn in objectStreamsOffsets: - objectOffset = objectStreamsOffsets[compressedIn] + objectOffset + 20 + objectOffset = objectStreamsOffsets[compressedIn] + objectOffset + 20 size = indirectObject.getSize() - if offsets.has_key('objects'): - offsets['objects'].append((id,objectOffset,size)) + if "objects" in offsets: + offsets['objects'].append((id, objectOffset, size)) else: - offsets['objects'] = [(id,objectOffset,size)] - if xref != None: + offsets['objects'] = [(id, objectOffset, size)] + if xref is not None: xrefOffset = xref.getOffset() xrefSize = xref.getSize() offsets['xref'] = (xrefOffset, xrefSize) else: offsets['xref'] = None - if trailer != None: + if trailer is not None: trailerOffset = trailer.getOffset() trailerSize = trailer.getSize() eofOffset = trailer.getEOFOffset() - offsets['trailer'] = (trailerOffset,trailerSize) - offsets['eof'] = (eofOffset,0) + offsets['trailer'] = (trailerOffset, trailerSize) + offsets['eof'] = (eofOffset, 0) else: offsets['trailer'] = None offsets['eof'] = None @@ -6064,20 +6114,20 @@ def getOffsets(self, version = None): def getOwnerPass(self): return self.ownerPass - + def getPath(self): return self.path - - def getReferencesIn (self, id, version = None) : - ''' + + def getReferencesIn(self, id, version=None): + ''' Get the references in an object ''' - if version == None: - for i in range(self.updates,-1,-1): + if version is None: + for i in range(self.updates, -1, -1): indirectObjectsDict = self.body[i].getObjects() - if indirectObjectsDict.has_key(id): + if id in indirectObjectsDict: indirectObject = indirectObjectsDict[id] - if indirectObject == None: + if indirectObject is None: return None else: return indirectObject.getReferences() @@ -6087,27 +6137,27 @@ def getReferencesIn (self, id, version = None) : if version > self.updates or version < 0: return None indirectObjectsDict = self.body[version].getObjects() - if indirectObjectsDict.has_key(id): + if id in indirectObjectsDict: indirectObject = indirectObjectsDict[id] - if indirectObject == None: + if indirectObject is None: return None else: return indirectObject.getReferences() else: return None - - def getReferencesTo (self, id, version = None) : - ''' + + def getReferencesTo(self, id, version=None): + ''' Get the references to the specified object in the document ''' matchedObjects = [] - if version == None: + if version is None: for i in range(self.updates + 1): indirectObjectsDict = self.body[i].getObjects() - for indirectObject in indirectObjectsDict.values(): - if indirectObject != None: + for indirectObject in list(indirectObjectsDict.values()): + if indirectObject is not None: object = indirectObject.getObject() - if object != None: + if object is not None: value = object.getValue() if re.findall('\D'+str(id)+'\s{1,3}\d{1,3}\s{1,3}R', value) != []: matchedObjects.append(indirectObject.id) @@ -6115,10 +6165,10 @@ def getReferencesTo (self, id, version = None) : if version > self.updates or version < 0: return None indirectObjectsDict = self.body[version].getObjects() - for indirectObject in indirectObjectsDict.values(): - if indirectObject != None: + for indirectObject in list(indirectObjectsDict.values()): + if indirectObject is not None: object = indirectObject.getObject() - if object != None: + if object is not None: value = object.getValue() if re.findall('\D'+str(id)+'\s{1,3}\d{1,3}\s{1,3}R', value) != []: matchedObjects.append(indirectObject.id) @@ -6126,14 +6176,14 @@ def getReferencesTo (self, id, version = None) : def getSHA1(self): return self.sha1 - + def getSHA256(self): return self.sha256 - + def getSize(self): return self.size - - def getStats (self): + + def getStats(self): stats = {} stats['File'] = self.fileName stats['MD5'] = self.md5 @@ -6159,50 +6209,50 @@ def getStats (self): catalogId = None infoId = None trailer, streamTrailer = self.trailer[version] - if trailer != None: + if trailer is not None: catalogId = trailer.getCatalogId() infoId = trailer.getInfoId() - if catalogId == None and streamTrailer != None: + if catalogId is None and streamTrailer is not None: catalogId = streamTrailer.getCatalogId() - if infoId == None and streamTrailer != None: + if infoId is None and streamTrailer is not None: infoId = streamTrailer.getInfoId() - if catalogId != None: + if catalogId is not None: statsVersion['Catalog'] = str(catalogId) else: statsVersion['Catalog'] = None - if infoId != None: + if infoId is not None: statsVersion['Info'] = str(infoId) else: statsVersion['Info'] = None objectsById = sorted(self.body[version].getObjectsIds(), key=lambda x: int(x)) - statsVersion['Objects'] = [str(self.body[version].getNumObjects()),objectsById] + statsVersion['Objects'] = [str(self.body[version].getNumObjects()), objectsById] if self.body[version].containsCompressedObjects(): compressedObjects = self.body[version].getCompressedObjects() - statsVersion['Compressed Objects'] = [str(len(compressedObjects)),compressedObjects] + statsVersion['Compressed Objects'] = [str(len(compressedObjects)), compressedObjects] else: statsVersion['Compressed Objects'] = None numFaultyObjects = self.body[version].getNumFaultyObjects() if numFaultyObjects > 0: - statsVersion['Errors'] = [str(numFaultyObjects),self.body[version].getFaultyObjects()] + statsVersion['Errors'] = [str(numFaultyObjects), self.body[version].getFaultyObjects()] else: statsVersion['Errors'] = None numStreams = self.body[version].getNumStreams() - statsVersion['Streams'] = [str(numStreams),self.body[version].getStreams()] + statsVersion['Streams'] = [str(numStreams), self.body[version].getStreams()] if self.body[version].containsXrefStreams(): xrefStreams = self.body[version].getXrefStreams() - statsVersion['Xref Streams'] = [str(len(xrefStreams)),xrefStreams] + statsVersion['Xref Streams'] = [str(len(xrefStreams)), xrefStreams] else: statsVersion['Xref Streams'] = None if self.body[version].containsObjectStreams(): objectStreams = self.body[version].getObjectStreams() - statsVersion['Object Streams'] = [str(len(objectStreams)),objectStreams] + statsVersion['Object Streams'] = [str(len(objectStreams)), objectStreams] else: statsVersion['Object Streams'] = None if numStreams > 0: - statsVersion['Encoded'] = [str(self.body[version].getNumEncodedStreams()),self.body[version].getEncodedStreams()] + statsVersion['Encoded'] = [str(self.body[version].getNumEncodedStreams()), self.body[version].getEncodedStreams()] numDecodingErrors = self.body[version].getNumDecodingErrors() if numDecodingErrors > 0: - statsVersion['Decoding Errors'] = [str(numDecodingErrors),self.body[version].getFaultyStreams()] + statsVersion['Decoding Errors'] = [str(numDecodingErrors), self.body[version].getFaultyStreams()] else: statsVersion['Decoding Errors'] = None else: @@ -6214,7 +6264,7 @@ def getStats (self): statsVersion['URIs'] = None containingJS = self.body[version].getContainingJS() if len(containingJS) > 0: - statsVersion['Objects with JS code'] = [str(len(containingJS)),containingJS] + statsVersion['Objects with JS code'] = [str(len(containingJS)), containingJS] else: statsVersion['Objects with JS code'] = None actions = self.body[version].getSuspiciousActions() @@ -6245,41 +6295,41 @@ def getStats (self): stats['Versions'].append(statsVersion) return stats - def getSuspiciousComponents (self) : + def getSuspiciousComponents(self): pass - - def getTrailer (self, version = None) : - if version == None: - for i in range(self.updates,-1,-1): + + def getTrailer(self, version=None): + if version is None: + for i in range(self.updates, -1, -1): trailerArray = self.trailer[i] - if trailerArray == None or trailerArray == []: + if trailerArray is None or trailerArray == []: continue else: - return (i,trailerArray) + return (i, trailerArray) else: - #self.addError('Trailer not found in file') + # self.addError('Trailer not found in file') return None else: if version > self.updates or version < 0: - #self.addError('Bad version getting trailer') + # self.addError('Bad version getting trailer') return None trailerArray = self.trailer[version] - if trailerArray == None or trailerArray == []: + if trailerArray is None or trailerArray == []: return None else: - return (version,trailerArray) + return (version, trailerArray) - def getTree (self, version = None) : + def getTree(self, version=None): ''' Returns the logical structure (tree) of the document ''' tree = [] - - if version == None: - versions = range(self.updates+1) + + if version is None: + versions = list(range(self.updates+1)) else: versions = [version] - + for version in versions: objectsIn = {} trailer = None @@ -6288,21 +6338,21 @@ def getTree (self, version = None) : infoId = None ids = self.body[version].getObjectsIds() ret = self.getTrailer(version) - if ret != None: + if ret is not None: trailer, streamTrailer = ret[1] # Getting info and catalog id - if trailer != None: + if trailer is not None: catalogId = trailer.getCatalogId() infoId = trailer.getInfoId() - if catalogId == None and streamTrailer != None: + if catalogId is None and streamTrailer is not None: catalogId = streamTrailer.getCatalogId() - if infoId == None and streamTrailer != None: + if infoId is None and streamTrailer is not None: infoId = streamTrailer.getInfoId() for id in ids: referencesIds = [] object = self.getObject(id, version) - if object != None: - type = object.getType() + if object is not None: + type = object.getType() if type == 'dictionary' or type == 'stream': elements = object.getElements() if infoId == id: @@ -6313,11 +6363,11 @@ def getTree (self, version = None) : type = dictType else: if type == 'dictionary' and len(elements) == 1: - type = elements.keys()[0] + type = list(elements.keys())[0] references = self.getReferencesIn(id, version) for i in range(len(references)): referencesIds.append(int(references[i].split()[0])) - if references == None: + if references is None: objectsIn[id] = (type, []) else: objectsIn[id] = (type, referencesIds) @@ -6325,17 +6375,17 @@ def getTree (self, version = None) : return tree def getUpdates(self): - return self.updates + return self.updates - def getURLs (self, version = None) : + def getURLs(self, version=None): urls = [] - if version == None: + if version is None: for version in range(self.updates+1): urls += self.body[version].getURLs() else: if version <= self.updates and not version < 0: urls = self.body[version].getURLs() - return urls + return urls def getURIs(self, version=None, perObject=False): uris = [] @@ -6355,40 +6405,40 @@ def getURIs(self, version=None, perObject=False): def getUserPass(self): return self.userPass - + def getVersion(self): return self.version - def getXrefSection (self, version = None) : - if version == None: - for i in range(self.updates,-1,-1): + def getXrefSection(self, version=None): + if version is None: + for i in range(self.updates, -1, -1): xrefArray = self.crossRefTable[i] - if xrefArray == None or xrefArray == []: + if xrefArray is None or xrefArray == []: continue else: - return (i,xrefArray) + return (i, xrefArray) else: - #self.addError('Xref section not found in file') + # self.addError('Xref section not found in file') return None else: if version > self.updates or version < 0: return None xrefArray = self.crossRefTable[version] - if xrefArray == None or xrefArray == []: + if xrefArray is None or xrefArray == []: return None else: - return (version,xrefArray) - + return (version, xrefArray) + def headerToFile(self, malformedOptions, headerFile): headerGarbage = '' if MAL_ALL in malformedOptions or MAL_HEAD in malformedOptions: - if headerFile == None: + if headerFile is None: if self.garbageHeader == '': headerGarbage = 'MZ'+'_'*100 else: headerGarbage = self.garbageHeader else: - headerGarbage = open(headerFile,'rb').read() + headerGarbage = open(headerFile, 'rb').read() headerGarbage += newLine if MAL_ALL in malformedOptions or MAL_BAD_HEAD in malformedOptions: output = headerGarbage + '%PDF-1.\0' + newLine @@ -6399,7 +6449,7 @@ def headerToFile(self, malformedOptions, headerFile): self.binaryChars = '\xC0\xFF\xEE\xFA\xBA\xDA' output += '%' + self.binaryChars + newLine return output - + def isEncrypted(self): return self.encrypted @@ -6416,57 +6466,57 @@ def makePDF(self, pdfType, content): offset = 16 else: offset = 10 - + # Body body = PDFBody() - xrefEntries.append(PDFCrossRefEntry(0,65535,'f')) + xrefEntries.append(PDFCrossRefEntry(0, 65535, 'f')) # Catalog (1) - catalogElements = {'/Type':PDFName('Catalog'),'/Pages':PDFReference('2')} + catalogElements = {'/Type': PDFName('Catalog'), '/Pages': PDFReference('2')} if pdfType == 'open_action_js': catalogElements['/OpenAction'] = PDFReference('4') catalogDictionary = PDFDictionary(elements=catalogElements) catalogSize = staticIndirectObjectSize + len(catalogDictionary.getRawValue()) - body.setObject(object = catalogDictionary, offset = offset) - xrefEntries.append(PDFCrossRefEntry(offset,0,'n')) + body.setObject(object=catalogDictionary, offset=offset) + xrefEntries.append(PDFCrossRefEntry(offset, 0, 'n')) offset += catalogSize # Pages root node (2) - pagesDictionary = PDFDictionary(elements={'/Type':PDFName('Pages'),'/Kids':PDFArray(elements=[PDFReference('3')]),'/Count':PDFNum('1')}) + pagesDictionary = PDFDictionary(elements={'/Type': PDFName('Pages'), '/Kids': PDFArray(elements=[PDFReference('3')]), '/Count': PDFNum('1')}) pagesSize = len(pagesDictionary.getRawValue())+staticIndirectObjectSize - body.setObject(object = pagesDictionary, offset = offset) - xrefEntries.append(PDFCrossRefEntry(offset,0,'n')) + body.setObject(object=pagesDictionary, offset=offset) + xrefEntries.append(PDFCrossRefEntry(offset, 0, 'n')) offset += pagesSize # Page node (3) - mediaBoxArray = PDFArray(elements=[PDFNum('0'),PDFNum('0'),PDFNum('600'),PDFNum('800')]) - pageDictionary = PDFDictionary(elements={'/Type':PDFName('Page'),'/Parent':PDFReference('2'),'/MediaBox':mediaBoxArray,'/Resources':PDFDictionary()}) + mediaBoxArray = PDFArray(elements=[PDFNum('0'), PDFNum('0'), PDFNum('600'), PDFNum('800')]) + pageDictionary = PDFDictionary(elements={'/Type': PDFName('Page'), '/Parent': PDFReference('2'), '/MediaBox': mediaBoxArray, '/Resources': PDFDictionary()}) pageSize = len(pageDictionary.getRawValue())+staticIndirectObjectSize - body.setObject(object = pageDictionary, offset = offset) - xrefEntries.append(PDFCrossRefEntry(offset,0,'n')) + body.setObject(object=pageDictionary, offset=offset) + xrefEntries.append(PDFCrossRefEntry(offset, 0, 'n')) offset += pageSize if pdfType == 'open_action_js': # Action object (4) - actionDictionary = PDFDictionary(elements={'/Type':PDFName('Action'),'/S':PDFName('JavaScript'),'/JS':PDFReference('5')}) + actionDictionary = PDFDictionary(elements={'/Type': PDFName('Action'), '/S': PDFName('JavaScript'), '/JS': PDFReference('5')}) actionSize = len(actionDictionary.getRawValue())+staticIndirectObjectSize - body.setObject(object = actionDictionary, offset = offset) - xrefEntries.append(PDFCrossRefEntry(offset,0,'n')) + body.setObject(object=actionDictionary, offset=offset) + xrefEntries.append(PDFCrossRefEntry(offset, 0, 'n')) offset += actionSize # JS stream (5) try: - jsStream = PDFStream(rawStream = content, elements = {'/Length':PDFNum(str(len(content)))}) + jsStream = PDFStream(rawStream=content, elements={'/Length': PDFNum(str(len(content)))}) except Exception as e: errorMessage = 'Error creating PDFStream' if e.message != '': errorMessage += ': '+e.message return (-1, errorMessage) - ret = jsStream.setElement('/Filter',PDFName('FlateDecode')) + ret = jsStream.setElement('/Filter', PDFName('FlateDecode')) if ret[0] == -1: self.addError(ret[1]) return ret jsSize = len(jsStream.getRawValue())+staticIndirectObjectSize - ret = body.setObject(object = jsStream, offset = offset) + ret = body.setObject(object=jsStream, offset=offset) if ret[0] == -1: self.addError(ret[1]) return ret - xrefEntries.append(PDFCrossRefEntry(offset,0,'n')) + xrefEntries.append(PDFCrossRefEntry(offset, 0, 'n')) offset += jsSize numObjects = 5 body.setNextOffset(offset) @@ -6475,9 +6525,9 @@ def makePDF(self, pdfType, content): self.addNumStreams(body.getNumStreams()) self.addNumEncodedStreams(body.getNumEncodedStreams()) self.addNumDecodingErrors(body.getNumDecodingErrors()) - + # xref table - subsection = PDFCrossRefSubSection(0,numObjects+1,xrefEntries) + subsection = PDFCrossRefSubSection(0, numObjects+1, xrefEntries) xrefSection = PDFCrossRefSection() xrefSection.addSubsection(subsection) xrefSection.setOffset(offset) @@ -6485,28 +6535,28 @@ def makePDF(self, pdfType, content): xrefSectionSize = len(xrefEntries)*20+10 xrefSection.setSize(xrefSectionSize) offset += xrefSectionSize - self.addCrossRefTableSection([xrefSection,None]) - + self.addCrossRefTableSection([xrefSection, None]) + # Trailer - trailerDictionary = PDFDictionary(elements={'/Size':PDFNum(str(numObjects+1)),'/Root':PDFReference('1')}) + trailerDictionary = PDFDictionary(elements={'/Size': PDFNum(str(numObjects+1)), '/Root': PDFReference('1')}) trailerSize = len(trailerDictionary.getRawValue())+25 - trailer = PDFTrailer(trailerDictionary,str(xrefOffset)) + trailer = PDFTrailer(trailerDictionary, str(xrefOffset)) trailer.setOffset(offset) trailer.setSize(trailerSize) trailer.setEOFOffset(offset+trailerSize) - self.addTrailer([trailer,None]) + self.addTrailer([trailer, None]) self.setSize(offset+trailerSize+5) self.updateStats() - return (0,'') + return (0, '') def replace(self, string1, string2): errorMessage = '' stringFound = False for i in range(self.updates + 1): - objects = self.getObjectsByString(string1,i) + objects = self.getObjectsByString(string1, i) for id in objects: object = self.getObject(id, i) - if object != None: + if object is not None: ret = object.replace(string1, string2) if ret[0] == -1 and not stringFound: errorMessage = ret[1] @@ -6516,28 +6566,28 @@ def replace(self, string1, string2): if ret[0] == -1: errorMessage = ret[1] if not stringFound: - return (-1,'String not found') + return (-1, 'String not found') if errorMessage != '': return (-1, errorMessage) else: - return (0,'') + return (0, '') - def removeError(self, errorMessage = '', errorType = None): + def removeError(self, errorMessage='', errorType=None): ''' Removes the error message from the errors array. If an errorType is given, then all the error messages belonging to this type are removed. - + @param errorMessage: The error message to be removed (string) - @param errorType: All the error messages of this type will be removed (string) + @param errorType: All the error messages of this type will be removed (string) ''' if errorMessage in self.errors: self.errors.remove(errorMessage) - if errorType != None: + if errorType is not None: lenErrorType = len(errorType) for error in self.errors: if error[:lenErrorType] == errorType: self.errors.remove(error) - - def save(self, filename, version = None, malformedOptions = [], headerFile = None): + + def save(self, filename, version=None, malformedOptions=[], headerFile=None): maxId = 0 offset = 0 lastXrefSectionOffset = 0 @@ -6547,9 +6597,9 @@ def save(self, filename, version = None, malformedOptions = [], headerFile = Non xrefStreamObjectId = None xrefStreamObject = None try: - if version == None: + if version is None: version = self.updates - outputFileContent = self.headerToFile(malformedOptions,headerFile) + outputFileContent = self.headerToFile(malformedOptions, headerFile) offset = len(outputFileContent) for v in range(version+1): xrefStreamObjectId = None @@ -6558,52 +6608,52 @@ def save(self, filename, version = None, malformedOptions = [], headerFile = Non indirectObjects = self.body[v].getObjects() section, streamSection = self.crossRefTable[v] trailer, streamTrailer = self.trailer[v] - if section != None: + if section is not None: numSubSectionsInXref = section.getSubsectionsNumber() else: numSubSectionsInXref = 0 - if streamSection != None: + if streamSection is not None: numSubSectionsInXrefStream = streamSection.getSubsectionsNumber() else: numSubSectionsInXrefStream = 0 - if streamSection != None: + if streamSection is not None: xrefStreamObjectId = streamSection.getXrefStreamObject() - if indirectObjects.has_key(xrefStreamObjectId): + if xrefStreamObjectId in indirectObjects: xrefStreamObject = indirectObjects[xrefStreamObjectId] sortedObjectsIds.remove(xrefStreamObjectId) for id in sortedObjectsIds: if id > maxId: maxId = id indirectObject = indirectObjects[id] - if indirectObject != None: + if indirectObject is not None: object = indirectObject.getObject() - if object != None: + if object is not None: objectType = object.getType() if not object.isCompressed(): indirectObject.setOffset(offset) if numSubSectionsInXref != 0: ret = section.updateOffset(id, offset) if ret[0] == -1: - ret = section.addEntry(id,PDFCrossRefEntry(offset,0,'n')) + ret = section.addEntry(id, PDFCrossRefEntry(offset, 0, 'n')) if ret[0] == -1: self.addError(ret[1]) if numSubSectionsInXrefStream != 0: ret = streamSection.updateOffset(id, offset) if ret[0] == -1: - ret = streamSection.addEntry(id,PDFCrossRefEntry(offset,0,'n')) + ret = streamSection.addEntry(id, PDFCrossRefEntry(offset, 0, 'n')) if ret[0] == -1: self.addError(ret[1]) objectFileOutput = indirectObject.toFile() if objectType == 'stream' and MAL_ESTREAM in malformedOptions: - objectFileOutput = objectFileOutput.replace(newLine+'endstream','') + objectFileOutput = objectFileOutput.replace(newLine+'endstream', '') elif MAL_ALL in malformedOptions or MAL_EOBJ in malformedOptions: - objectFileOutput = objectFileOutput.replace(newLine+'endobj','') + objectFileOutput = objectFileOutput.replace(newLine+'endobj', '') outputFileContent += objectFileOutput offset = len(outputFileContent) indirectObject.setSize(offset-indirectObject.getOffset()) indirectObjects[id] = indirectObject - - if xrefStreamObject != None: + + if xrefStreamObject is not None: if numSubSectionsInXref != 0: ret = section.updateOffset(xrefStreamObjectId, offset) if ret[0] == -1: @@ -6615,7 +6665,7 @@ def save(self, filename, version = None, malformedOptions = [], headerFile = Non if xrefStreamObjectId > maxId: maxId = xrefStreamObjectId streamSection.setSize(maxId+1) - if streamTrailer != None: + if streamTrailer is not None: streamTrailer.setNumObjects(maxId+1) if prevXrefStreamOffset != 0: streamTrailer.setPrevCrossRefSection(prevXrefStreamOffset) @@ -6623,32 +6673,32 @@ def save(self, filename, version = None, malformedOptions = [], headerFile = Non self.crossRefTable[v][1] = streamSection ret = self.createXrefStream(v, xrefStreamObjectId) if ret[0] == -1: - return (-1,ret[1]) - xrefStreamObjectId,newXrefStream = ret[1] + return (-1, ret[1]) + xrefStreamObjectId, newXrefStream = ret[1] xrefStreamObject.setObject(newXrefStream) objectFileOutput = xrefStreamObject.toFile() if MAL_ALL in malformedOptions or MAL_ESTREAM in malformedOptions: - objectFileOutput = objectFileOutput.replace(newLine+'endstream','') + objectFileOutput = objectFileOutput.replace(newLine+'endstream', '') outputFileContent += objectFileOutput prevXrefStreamOffset = offset lastXrefSectionOffset = offset offset = len(outputFileContent) xrefStreamObject.setSize(offset-xrefStreamObject.getOffset()) indirectObjects[xrefStreamObjectId] = xrefStreamObject - self.body[v].setNextOffset(offset) - - if section != None and MAL_ALL not in malformedOptions and MAL_XREF not in malformedOptions: + self.body[v].setNextOffset(offset) + + if section is not None and MAL_ALL not in malformedOptions and MAL_XREF not in malformedOptions: section.setOffset(offset) lastXrefSectionOffset = offset outputFileContent += section.toFile() offset = len(outputFileContent) section.setSize(offset-section.getOffset()) self.crossRefTable[v][0] = section - - if trailer != None: + + if trailer is not None: trailer.setLastCrossRefSection(lastXrefSectionOffset) trailer.setOffset(offset) - if trailer.getCatalogId() != None and trailer.getSize() != 0: + if trailer.getCatalogId() is not None and trailer.getSize() != 0: trailer.setNumObjects(maxId+1) if prevXrefSectionOffset != 0: trailer.setPrevCrossRefSection(prevXrefSectionOffset) @@ -6659,42 +6709,42 @@ def save(self, filename, version = None, malformedOptions = [], headerFile = Non prevXrefSectionOffset = lastXrefSectionOffset self.body[v].setObjects(indirectObjects) offset = len(outputFileContent) - open(filename,'wb').write(outputFileContent) + open(filename, 'wb').write(outputFileContent) self.setMD5(hashlib.md5(outputFileContent).hexdigest()) self.setSize(len(outputFileContent)) self.path = os.path.realpath(filename) self.fileName = filename except: - return (-1,'Unspecified error') - return (0,'') + return (-1, 'Unspecified error') + return (0, '') def setDetectionRate(self, newRate): self.detectionRate = newRate def setDetectionReport(self, detectionReportLink): self.detectionReport = detectionReportLink - + def setEncryptDict(self, dict): self.encryptDict = dict def setEncrypted(self, status): - self.encrypted = status + self.encrypted = status def setEncryptionAlgorithms(self, encryptionAlgorithms): self.encryptionAlgorithms = encryptionAlgorithms def setEncryptionKey(self, key): - self.encryptionKey = key + self.encryptionKey = key def setEncryptionKeyLength(self, length): self.encryptionKeyLength = length - + def setEndLine(self, eol): - self.endLine = eol + self.endLine = eol def setFileId(self, fid): self.fileId = fid - + def setFileName(self, name): self.fileName = name @@ -6710,17 +6760,17 @@ def setLinearized(self, status): def setMaxObjectId(self, id): if int(id) > self.maxObjectId: self.maxObjectId = int(id) - + def setMD5(self, md5): self.md5 = md5 - - def setObject (self, id, object, version = None, mod = False): + + def setObject(self, id, object, version=None, mod=False): errorMessage = '' - if object == None: - return (-1,'Object is None') - if version == None: - for i in range(self.updates,-1,-1): - ret = self.body[i].setObject(id, object, modification = mod) + if object is None: + return (-1, 'Object is None') + if version is None: + for i in range(self.updates, -1, -1): + ret = self.body[i].setObject(id, object, modification=mod) if ret[0] == -1: errorMessage = ret[1] else: @@ -6732,11 +6782,11 @@ def setObject (self, id, object, version = None, mod = False): return (-1, errorMessage) else: if version > self.updates or version < 0: - return (-1,'Bad file version') - ret = self.body[version].setObject(id, object, modification = mod) + return (-1, 'Bad file version') + ret = self.body[version].setObject(id, object, modification=mod) if ret[0] == -1: self.addError(ret[1]) - return (-1,ret[1]) + return (-1, ret[1]) else: objectType = object.getType() if objectType == 'dictionary' and object.hasElement('/Linearized'): @@ -6744,8 +6794,8 @@ def setObject (self, id, object, version = None, mod = False): return ret def setOwnerPass(self, password): - self.ownerPass = password - + self.ownerPass = password + def setPath(self, path): self.path = path @@ -6757,11 +6807,11 @@ def setSHA256(self, sha256): def setSize(self, size): self.size = size - - def setTrailer(self, trailerArray, version = None): + + def setTrailer(self, trailerArray, version=None): errorMessage = '' - if version == None: - for i in range(self.updates,-1,-1): + if version is None: + for i in range(self.updates, -1, -1): if len(self.trailer) > i: self.trailer[i] = trailerArray else: @@ -6769,72 +6819,80 @@ def setTrailer(self, trailerArray, version = None): self.addError(errorMessage) else: if version > self.updates or version < 0: - return (-1,'Bad file version') + return (-1, 'Bad file version') self.trailer[version] = trailerArray if errorMessage != '': return (-1, errorMessage) - return (0,'') + return (0, '') def setUpdates(self, num): - self.updates = num + self.updates = num def setUserPass(self, password): self.userPass = password def setVersion(self, version): self.version = version - - def updateStats(self, recursiveUpdate = False): + + def updateStats(self, recursiveUpdate=False): self.numObjects = 0 self.numStreams = 0 self.numEncodedStreams = 0 self.numDecodingErrors = 0 self.encrypted = False - + for v in range(self.updates+1): if recursiveUpdate: - #TODO + # TODO self.updateBody(v) self.updateCrossRefTable(v) self.updateTrailer(v) - - #body.updateObjects() + + # body.updateObjects() self.addNumObjects(self.body[v].getNumObjects()) self.addNumStreams(self.body[v].getNumStreams()) self.addNumEncodedStreams(self.body[v].getNumEncodedStreams()) self.addNumDecodingErrors(self.body[v].getNumDecodingErrors()) self.addNumURIs(self.body[v].getNumURIs()) trailer, streamTrailer = self.trailer[v] - if trailer != None: - if trailer.getDictEntry('/Encrypt') != None: + if trailer is not None: + if trailer.getDictEntry('/Encrypt') is not None: self.setEncrypted(True) - if streamTrailer != None: - if streamTrailer.getDictEntry('/Encrypt') != None: + if streamTrailer is not None: + if streamTrailer.getDictEntry('/Encrypt') is not None: self.setEncrypted(True) - return (0,'') + return (0, '') - def updateBody (self, version) : - #TODO + def updateBody(self, version): + # TODO pass - - def updateCrossRefTable (self, version) : - #TODO + + def updateCrossRefTable(self, version): + # TODO pass - - def updateTrailer (self, version) : - #TODO + + def updateTrailer(self, version): + # TODO pass -class PDFParser : - def __init__(self) : +class PDFParser: + def __init__(self): self.commentChar = '%' self.comments = [] - self.delimiters = [('<<','>>','dictionary'),('(',')','string'),('<','>','hexadecimal'),('[',']','array'),('{','}',''),('/','','name'),('%','','comment')] + self.delimiters = [ + ('<<', '>>', 'dictionary'), + ('(', ')', 'string'), + ('<', '>', 'hexadecimal'), + ('[', ']', 'array'), + ('{', '}', ''), + ('/', '', 'name'), + ('%', '', 'comment'), + ] self.fileParts = [] - self.charCounter = 0 - - def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis = False) : + self.charCounter = 0 + + def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False): ''' Main method to parse a PDF document @param fileName The name of the file to be parsed @@ -6855,10 +6913,11 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis pdfFile.setFileName(os.path.basename(fileName)) isForceMode = forceMode isManualAnalysis = manualAnalysis - + # Reading the file header - file = open(fileName,'rb') + file = open(fileName, 'rb') for line in file: + line = line.decode('latin-1') if versionLine == '': pdfHeaderIndex = line.find('%PDF-') psHeaderIndex = line.find('%!PS-Adobe-') @@ -6883,11 +6942,11 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis break headerOffset += len(line) file.close() - + # Getting the specification version - versionLine = versionLine.replace('\r','') - versionLine = versionLine.replace('\n','') - matchVersion = re.findall('%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})',versionLine) + versionLine = versionLine.replace('\r', '') + versionLine = versionLine.replace('\n', '') + matchVersion = re.findall('%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})', versionLine) if matchVersion == []: if forceMode: pdfFile.setVersion(versionLine) @@ -6899,7 +6958,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis pdfFile.setVersion(matchVersion[0][1]) if garbageHeader != '': pdfFile.setGarbageHeader(garbageHeader) - + # Getting the end of line if len(binaryLine) > 3: if binaryLine[-2:] == '\r\n': @@ -6911,28 +6970,33 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis pdfFile.setEndLine('\n') else: pdfFile.setEndLine('\n') - + # Does it contain binary characters?? if binaryLine[0] == '%' and ord(binaryLine[1]) >= 128 and ord(binaryLine[2]) >= 128 and ord(binaryLine[3]) >= 128 and ord(binaryLine[4]) >= 128: pdfFile.binary = True pdfFile.binaryChars = binaryLine[1:5] else: pdfFile.binary = False - + # Reading the rest of the file - fileContent = open(fileName,'rb').read() + fileContent = open(fileName, 'rb').read() pdfFile.setSize(len(fileContent)) pdfFile.setMD5(hashlib.md5(fileContent).hexdigest()) pdfFile.setSHA1(hashlib.sha1(fileContent).hexdigest()) pdfFile.setSHA256(hashlib.sha256(fileContent).hexdigest()) - + # Getting the number of updates in the file - while fileContent.find('%%EOF') != -1: - self.readUntilSymbol(fileContent, '%%EOF') - self.readUntilEndOfLine(fileContent) - self.fileParts.append(fileContent[:self.charCounter]) - fileContent = fileContent[self.charCounter:] + while fileContent.find(b'%%EOF') != -1: self.charCounter = 0 + self.readUntilSymbol(fileContent, b'%%EOF') + if six.PY3: + self.readUntilEndOfLine(fileContent.decode('latin-1')) + self.fileParts.append(fileContent[:self.charCounter].decode('latin-1')) + fileContent = fileContent[self.charCounter:] + else: + self.readUntilEndOfLine(fileContent) + self.fileParts.append(fileContent[:self.charCounter]) + fileContent = fileContent[self.charCounter:] else: if self.fileParts == []: errorMessage = '%%EOF not found' @@ -6941,22 +7005,22 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis self.fileParts.append(fileContent) else: sys.exit(errorMessage) + + if six.PY3: + fileContent = fileContent.decode('latin-1') + pdfFile.setUpdates(len(self.fileParts) - 1) - # Getting the body, cross reference table and trailer of each part of the file for i in range(len(self.fileParts)): bodyOffset = 0 xrefOffset = 0 trailerOffset = 0 - eofOffset = 0 xrefObject = None xrefContent = None xrefSection = None xrefStreamSection = None - xrefFound = False streamTrailer = None trailer = None - trailerFound = False pdfIndirectObject = None if not pdfFile.isEncrypted(): encryptDict = None @@ -6968,23 +7032,23 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis bodyOffset = 0 else: bodyOffset = len(self.fileParts[i-1]) - + # Getting the content for each section - bodyContent,xrefContent,trailerContent = self.parsePDFSections(content,forceMode,looseMode) - if xrefContent != None: + if six.PY3 and isinstance(content, bytes): + content = content.decode('latin-1') + bodyContent, xrefContent, trailerContent = self.parsePDFSections(content, forceMode, looseMode) + if xrefContent is not None: xrefOffset = bodyOffset + len(bodyContent) trailerOffset = xrefOffset + len(xrefContent) bodyContent = bodyContent.strip('\r\n') xrefContent = xrefContent.strip('\r\n') trailerContent = trailerContent.strip('\r\n') - trailerFound = True - xrefFound = True else: - if trailerContent != None: + if trailerContent is not None: xrefOffset = -1 trailerOffset = bodyOffset + len(bodyContent) bodyContent = bodyContent.strip('\r\n') - trailerContent = trailerContent.strip('\r\n') + trailerContent = trailerContent.strip('\r\n') else: errorMessage = 'PDF sections not found' if forceMode: @@ -6992,14 +7056,14 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis else: sys.exit('Error: '+errorMessage+'!!') - # Converting the body content in PDFObjects body = PDFBody() rawIndirectObjects = self.getIndirectObjects(bodyContent, looseMode) if rawIndirectObjects != []: for j in range(len(rawIndirectObjects)): relativeOffset = 0 - auxContent = str(bodyContent) + + auxContent = bodyContent rawObject = rawIndirectObjects[j][0] objectHeader = rawIndirectObjects[j][1] while True: @@ -7009,7 +7073,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis break relativeOffset += index checkHeader = bodyContent[relativeOffset-1:relativeOffset+len(objectHeader)] - if not re.match('\d{1,10}'+objectHeader,checkHeader): + if not re.match('\d{1,10}'+objectHeader, checkHeader): break else: auxContent = auxContent[index+len(objectHeader):] @@ -7017,7 +7081,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis ret = self.createPDFIndirectObject(rawObject, forceMode, looseMode) if ret[0] != -1: pdfIndirectObject = ret[1] - if pdfIndirectObject != None: + if pdfIndirectObject is not None: if relativeOffset == -1: pdfIndirectObject.setOffset(relativeOffset) else: @@ -7027,7 +7091,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis pdfFile.addError(ret[1]) type = ret[1] pdfObject = pdfIndirectObject.getObject() - if pdfObject != None: + if pdfObject is not None: objectType = pdfObject.getType() if objectType == 'dictionary': if isFirstBody and not linearizedFound: @@ -7038,17 +7102,17 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis xrefObject = pdfIndirectObject ret = self.createPDFCrossRefSectionFromStream(pdfIndirectObject) if ret[0] != -1: - xrefStreamSection = ret[1] + xrefStreamSection = ret[1] else: if not forceMode: sys.exit('Error: An error has occurred while parsing an indirect object!!') else: - pdfFile.addError('Object is None') + pdfFile.addError('Object is None') else: if not forceMode: sys.exit('Error: Bad indirect object!!') else: - pdfFile.addError('Indirect object is None') + pdfFile.addError('Indirect object is None') else: if not forceMode: sys.exit('Error: An error has occurred while parsing an indirect object!!') @@ -7056,7 +7120,7 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis pdfFile.addError('Error parsing object: '+str(objectHeader)+' ('+str(ret[1])+')') else: pdfFile.addError('No indirect objects found in the body') - if pdfIndirectObject != None: + if pdfIndirectObject is not None: body.setNextOffset(pdfIndirectObject.getOffset()) ret = body.updateObjects() if ret[0] == -1: @@ -7068,75 +7132,75 @@ def parse (self, fileName, forceMode = False, looseMode = False, manualAnalysis pdfFile.addNumEncodedStreams(body.getNumEncodedStreams()) pdfFile.addNumDecodingErrors(body.getNumDecodingErrors()) isFirstBody = False - + # Converting the cross reference table content in PDFObjects - if xrefContent != None: - ret = self.createPDFCrossRefSection(xrefContent,xrefOffset) + if xrefContent is not None: + ret = self.createPDFCrossRefSection(xrefContent, xrefOffset) if ret[0] != -1: xrefSection = ret[1] pdfFile.addCrossRefTableSection([xrefSection, xrefStreamSection]) - + # Converting the trailer content in PDFObjects if body.containsXrefStreams(): - ret = self.createPDFTrailerFromStream(xrefObject,trailerContent) + ret = self.createPDFTrailerFromStream(xrefObject, trailerContent) if ret[0] != -1: streamTrailer = ret[1] - ret = self.createPDFTrailer(trailerContent, trailerOffset, streamPresent = True) + ret = self.createPDFTrailer(trailerContent, trailerOffset, streamPresent=True) if ret[0] != -1: trailer = ret[1] - if streamTrailer != None and not pdfFile.isEncrypted(): + if streamTrailer is not None and not pdfFile.isEncrypted(): encryptDict = streamTrailer.getDictEntry('/Encrypt') - if encryptDict != None: + if encryptDict is not None: pdfFile.setEncrypted(True) - elif trailer != None: + elif trailer is not None: encryptDict = trailer.getDictEntry('/Encrypt') - if encryptDict != None: + if encryptDict is not None: pdfFile.setEncrypted(True) - if trailer != None: + if trailer is not None: fileId = trailer.getDictEntry('/ID') - if fileId == None: + if fileId is None: fileId = streamTrailer.getDictEntry('/ID') else: ret = self.createPDFTrailer(trailerContent, trailerOffset) if ret[0] != -1 and not pdfFile.isEncrypted(): trailer = ret[1] encryptDict = trailer.getDictEntry('/Encrypt') - if encryptDict != None: + if encryptDict is not None: pdfFile.setEncrypted(True) fileId = trailer.getDictEntry('/ID') - if pdfFile.getEncryptDict() == None and encryptDict != None: + if pdfFile.getEncryptDict() is None and encryptDict is not None: objectType = encryptDict.getType() if objectType == 'reference': encryptDictId = encryptDict.getId() - encryptObject = pdfFile.getObject(encryptDictId,i) - if encryptObject != None: + encryptObject = pdfFile.getObject(encryptDictId, i) + if encryptObject is not None: objectType = encryptObject.getType() encryptDict = encryptObject else: if i == pdfFile.updates: pdfFile.addError('/Encrypt dictionary not found') if objectType == 'dictionary': - pdfFile.setEncryptDict([encryptDictId,encryptDict.getElements()]) + pdfFile.setEncryptDict([encryptDictId, encryptDict.getElements()]) - if fileId != None and pdfFile.getFileId() == '': + if fileId is not None and pdfFile.getFileId() == '': objectType = fileId.getType() if objectType == 'array': fileIdElements = fileId.getElements() - if fileIdElements != None and fileIdElements != []: - if fileIdElements[0] != None: + if fileIdElements is not None and fileIdElements != []: + if fileIdElements[0] is not None: fileId = fileIdElements[0].getValue() pdfFile.setFileId(fileId) - elif fileIdElements[1] != None: + elif fileIdElements[1] is not None: fileId = fileIdElements[1].getValue() pdfFile.setFileId(fileId) pdfFile.addTrailer([trailer, streamTrailer]) - if pdfFile.isEncrypted() and pdfFile.getEncryptDict() != None: + if pdfFile.isEncrypted() and pdfFile.getEncryptDict() is not None: ret = pdfFile.decrypt() if ret[0] == -1: pdfFile.addError(ret[1]) - return (0,pdfFile) + return (0, pdfFile) - def parsePDFSections(self, content, forceMode = False, looseMode = False): + def parsePDFSections(self, content, forceMode=False, looseMode=False): ''' Method to parse the different sections of a version of a PDF document. @param content The raw content of the version of the PDF document. @@ -7144,12 +7208,10 @@ def parsePDFSections(self, content, forceMode = False, looseMode = False): @param looseMode Boolean to set the loose mode when parsing objects. Default value: False. @return An array with the different sections found: body, trailer and cross reference table ''' - threeSections = False bodyContent = None xrefContent = None trailerContent = None - - global pdfFile + indexTrailer = content.find('trailer') if indexTrailer != -1: restContent = content[:indexTrailer] @@ -7167,8 +7229,8 @@ def parsePDFSections(self, content, forceMode = False, looseMode = False): bodyContent = restContent if forceMode: pdfFile.addError('Xref section not found') - return [bodyContent,xrefContent,trailerContent] - + return [bodyContent, xrefContent, trailerContent] + indexTrailer = content.find('startxref') if indexTrailer != -1: restContent = content[:indexTrailer] @@ -7179,11 +7241,11 @@ def parsePDFSections(self, content, forceMode = False, looseMode = False): else: trailerContent = auxTrailer[:indexEOF+5] bodyContent = restContent - return [bodyContent,xrefContent,trailerContent] - - return [content,xrefContent,trailerContent] - - def createPDFIndirectObject (self, rawIndirectObject, forceMode = False, looseMode = False) : + return [bodyContent, xrefContent, trailerContent] + + return [content, xrefContent, trailerContent] + + def createPDFIndirectObject(self, rawIndirectObject, forceMode=False, looseMode=False): ''' Create a PDFIndirectObject instance from the raw content of the PDF file @param rawIndirectObject string with the raw content of the PDF body. @@ -7191,19 +7253,18 @@ def createPDFIndirectObject (self, rawIndirectObject, forceMode = False, looseMo @param looseMode specifies if the parsing process should search for the endobj tag or not (boolean). @return A tuple (status,statusContent), where statusContent is the PDFIndirectObject in case status = 0 or an error in case status = -1 ''' - global pdfFile try: self.charCounter = 0 pdfIndirectObject = PDFIndirectObject() - ret,id = self.readUntilNotRegularChar(rawIndirectObject) + ret, id = self.readUntilNotRegularChar(rawIndirectObject) pdfIndirectObject.setId(int(id)) - ret,genNum = self.readUntilNotRegularChar(rawIndirectObject) + ret, genNum = self.readUntilNotRegularChar(rawIndirectObject) pdfIndirectObject.setGenerationNumber(int(genNum)) ret = self.readSymbol(rawIndirectObject, 'obj') if ret[0] == -1: return ret rawObject = rawIndirectObject[self.charCounter:] - ret = self.readObject(rawObject, forceMode = forceMode, looseMode = looseMode) + ret = self.readObject(rawObject, forceMode=forceMode, looseMode=looseMode) if ret[0] == -1: return ret object = ret[1] @@ -7215,7 +7276,7 @@ def createPDFIndirectObject (self, rawIndirectObject, forceMode = False, looseMo pdfFile.addError(errorMessage) return (-1, errorMessage) pdfFile.setMaxObjectId(id) - return (0,pdfIndirectObject) + return (0, pdfIndirectObject) def createPDFArray(self, rawContent): ''' @@ -7223,7 +7284,6 @@ def createPDFArray(self, rawContent): @param rawContent string with the raw content of the PDF body. @return A tuple (status,statusContent), where statusContent is the PDFArray in case status = 0 or an error in case status = -1 ''' - global pdfFile realCounter = self.charCounter self.charCounter = 0 elements = [] @@ -7239,7 +7299,7 @@ def createPDFArray(self, rawContent): pdfObject = None else: pdfObject = ret[1] - while pdfObject != None: + while pdfObject is not None: elements.append(pdfObject) ret = self.readObject(rawContent[self.charCounter:]) if ret[0] == -1: @@ -7261,8 +7321,8 @@ def createPDFArray(self, rawContent): errorMessage += ': '+e.message return (-1, errorMessage) self.charCounter = realCounter - return (0,pdfArray) - + return (0, pdfArray) + def createPDFDictionary(self, rawContent): ''' Create a PDFDictionary instance from the raw content of the PDF file @@ -7285,7 +7345,7 @@ def createPDFDictionary(self, rawContent): name = None else: name = ret[1] - while name != None: + while name is not None: key = name.getValue() rawNames[key] = name rawValue = rawContent[self.charCounter:] @@ -7300,10 +7360,13 @@ def createPDFDictionary(self, rawContent): elements[key] = PDFString(ret[1]) self.readSpaces(rawContent) else: - return (-1,'Bad object for '+str(key)+' key') + return (-1, 'Bad object for '+str(key)+' key') else: value = ret[1] - elements[key] = value + if value.value == "<< >>": + elements[key] = PDFString(rawValue) + else: + elements[key] = value ret = self.readObject(rawContent[self.charCounter:], 'name') if ret[0] == -1: if ret[1] != 'Empty content reading object': @@ -7316,7 +7379,7 @@ def createPDFDictionary(self, rawContent): name = None else: name = ret[1] - if name != None and name.getType() != 'name': + if name is not None and name.getType() != 'name': errorMessage = 'Name object not found in dictionary key' if isForceMode: pdfFile.addError(errorMessage) @@ -7331,7 +7394,7 @@ def createPDFDictionary(self, rawContent): errorMessage += ': '+e.message return (-1, errorMessage) self.charCounter = realCounter - return (0,pdfDictionary) + return (0, pdfDictionary) def createPDFStream(self, dict, stream): ''' @@ -7355,8 +7418,8 @@ def createPDFStream(self, dict, stream): else: name = None else: - name = ret[1] - while name != None: + name = ret[1] + while name is not None: key = name.getValue() rawNames[key] = name ret = self.readObject(dict[self.charCounter:]) @@ -7384,7 +7447,7 @@ def createPDFStream(self, dict, stream): name = None else: name = ret[1] - if elements.has_key('/Type') and elements['/Type'].getValue() == '/ObjStm': + if "/Type" in elements and elements['/Type'].getValue() == '/ObjStm': try: pdfStream = PDFObjectStream(dict, stream, elements, rawNames, {}) except Exception as e: @@ -7401,18 +7464,17 @@ def createPDFStream(self, dict, stream): errorMessage += ': '+e.message return (-1, errorMessage) self.charCounter = realCounter - return (0,pdfStream) + return (0, pdfStream) - def createPDFCrossRefSection (self, rawContent, offset): + def createPDFCrossRefSection(self, rawContent, offset): ''' Create a PDFCrossRefSection instance from the raw content of the PDF file @param rawContent String with the raw content of the PDF body (string) @param offset Offset of the cross reference section in the PDF file (int) @return A tuple (status,statusContent), where statusContent is the PDFCrossRefSection in case status = 0 or an error in case status = -1 ''' - global isForceMode, pdfFile if not isinstance(rawContent, str): - return (-1,'Empty xref content') + return (-1, 'Empty xref content') entries = [] auxOffset = 0 subSectionSize = 0 @@ -7430,15 +7492,15 @@ def createPDFCrossRefSection (self, rawContent, offset): lines = self.getLines(rawContent[self.charCounter:]) if lines == []: if isForceMode: - pdfCrossRefSubSection = PDFCrossRefSubSection(0, offset = -1) + pdfCrossRefSubSection = PDFCrossRefSubSection(0, offset=-1) pdfFile.addError('No entries in xref section') else: - return (-1,'Error: No entries in xref section!!') + return (-1, 'Error: No entries in xref section!!') else: for line in lines: match = re.findall(beginSubSectionRE, line) if match != []: - if pdfCrossRefSubSection != None: + if pdfCrossRefSubSection is not None: pdfCrossRefSubSection.setSize(subSectionSize) pdfCrossRefSection.addSubsection(pdfCrossRefSubSection) pdfCrossRefSubSection.setEntries(entries) @@ -7447,25 +7509,25 @@ def createPDFCrossRefSection (self, rawContent, offset): try: pdfCrossRefSubSection = PDFCrossRefSubSection(match[0][0], match[0][1], offset=auxOffset) except: - return (-1,'Error creating PDFCrossRefSubSection') + return (-1, 'Error creating PDFCrossRefSubSection') else: - match = re.findall(entryRE,line) + match = re.findall(entryRE, line) if match != []: try: pdfCrossRefEntry = PDFCrossRefEntry(match[0][0], match[0][1], match[0][2], offset=auxOffset) except: - return (-1,'Error creating PDFCrossRefEntry') + return (-1, 'Error creating PDFCrossRefEntry') entries.append(pdfCrossRefEntry) else: - #TODO: comments in line or spaces/\n\r...? + # TODO: comments in line or spaces/\n\r...? if isForceMode: - if pdfCrossRefSubSection != None: + if pdfCrossRefSubSection is not None: pdfCrossRefSubSection.addError('Bad format for cross reference entry: '+line) else: pdfCrossRefSubSection = PDFCrossRefSubSection(0, offset=-1) pdfFile.addError('Bad xref section') else: - return (-1,'Bad format for cross reference entry') + return (-1, 'Bad format for cross reference entry') auxOffset += len(line) subSectionSize += len(line) else: @@ -7478,9 +7540,9 @@ def createPDFCrossRefSection (self, rawContent, offset): pdfCrossRefSubSection.setSize(subSectionSize) pdfCrossRefSection.addSubsection(pdfCrossRefSubSection) pdfCrossRefSubSection.setEntries(entries) - return (0,pdfCrossRefSection) + return (0, pdfCrossRefSection) - def createPDFCrossRefSectionFromStream (self, objectStream): + def createPDFCrossRefSectionFromStream(self, objectStream): ''' Create a PDFCrossRefSection instance from the raw content of the PDF file @param objectStream Object stream object (PDFIndirectObject). @@ -7491,19 +7553,19 @@ def createPDFCrossRefSectionFromStream (self, objectStream): entries = [] numObjects = 0 numSubsections = 1 - bytesPerField = [1,2,1] + bytesPerField = [1, 2, 1] entrySize = 4 subsectionIndexes = [] - if objectStream != None: + if objectStream is not None: pdfCrossRefSection = PDFCrossRefSection() pdfCrossRefSection.setXrefStreamObject(objectStream.getId()) xrefObject = objectStream.getObject() - if xrefObject != None: + if xrefObject is not None: if xrefObject.hasElement('/Size'): sizeObject = xrefObject.getElementByName('/Size') - if sizeObject != None and sizeObject.getType() == 'integer': + if sizeObject is not None and sizeObject.getType() == 'integer': numObjects = sizeObject.getRawValue() - subsectionIndexes = [0,numObjects] + subsectionIndexes = [0, numObjects] else: errorMessage = 'Bad object type for /Size element' if isForceMode: @@ -7516,7 +7578,7 @@ def createPDFCrossRefSectionFromStream (self, objectStream): pdfCrossRefSection.addError(errorMessage) else: return (-1, errorMessage) - + if xrefObject.hasElement('/W'): bytesPerFieldObject = xrefObject.getElementByName('/W') if bytesPerFieldObject.getType() == 'array': @@ -7543,7 +7605,7 @@ def createPDFCrossRefSectionFromStream (self, objectStream): pdfCrossRefSection.addError(errorMessage) else: return (-1, errorMessage) - + if xrefObject.hasElement('/Index'): subsectionIndexesObject = xrefObject.getElementByName('/Index') if subsectionIndexesObject.getType() == 'array': @@ -7562,24 +7624,24 @@ def createPDFCrossRefSectionFromStream (self, objectStream): pdfCrossRefSection.addError(errorMessage) else: return (-1, errorMessage) - + pdfCrossRefSection.setBytesPerField(bytesPerField) stream = xrefObject.getStream() - for i in range(0,len(stream),entrySize): + for i in range(0, len(stream), entrySize): entryBytes = stream[i:i+entrySize] try: if bytesPerField[0] == 0: f1 = 1 else: - f1 = int(entryBytes[:bytesPerField[0]].encode('hex'),16) + f1 = int(entryBytes[:bytesPerField[0]].encode('hex'), 16) if bytesPerField[1] == 0: f2 = 0 else: - f2 = int(entryBytes[bytesPerField[0]:bytesPerField[0]+bytesPerField[1]].encode('hex'),16) + f2 = int(entryBytes[bytesPerField[0]:bytesPerField[0]+bytesPerField[1]].encode('hex'), 16) if bytesPerField[2] == 0: f3 = 0 else: - f3 = int(entryBytes[bytesPerField[0]+bytesPerField[1]:].encode('hex'),16) + f3 = int(entryBytes[bytesPerField[0]+bytesPerField[1]:].encode('hex'), 16) except: errorMessage = 'Error in hexadecimal conversion' if isForceMode: @@ -7587,19 +7649,19 @@ def createPDFCrossRefSectionFromStream (self, objectStream): else: return (-1, errorMessage) try: - pdfCrossRefEntry = PDFCrossRefEntry(f2,f3,f1) + pdfCrossRefEntry = PDFCrossRefEntry(f2, f3, f1) + entries.append(pdfCrossRefEntry) except: errorMessage = 'Error creating PDFCrossRefEntry' if isForceMode: pdfCrossRefSection.addError(errorMessage) else: return (-1, errorMessage) - entries.append(pdfCrossRefEntry) - for i in range(numSubsections): + for i in range(int(numSubsections)): firstObject = subsectionIndexes[index] numObjectsInSubsection = subsectionIndexes[index+1] try: - pdfCrossRefSubSection = PDFCrossRefSubSection(firstObject,numObjectsInSubsection) + pdfCrossRefSubSection = PDFCrossRefSubSection(firstObject, numObjectsInSubsection) except: errorMessage = 'Error creating PDFCrossRefSubSection' if isForceMode: @@ -7608,15 +7670,14 @@ def createPDFCrossRefSectionFromStream (self, objectStream): return (-1, errorMessage) pdfCrossRefSubSection.setEntries(entries[firstEntry:firstEntry+numObjectsInSubsection]) pdfCrossRefSection.addSubsection(pdfCrossRefSubSection) - firstentry = numObjectsInSubsection index += 2 - return (0,pdfCrossRefSection) + return (0, pdfCrossRefSection) else: - return (-1,'The object stream is None') + return (-1, 'The object stream is None') else: - return (-1,'The indirect object stream is None') + return (-1, 'The indirect object stream is None') - def createPDFTrailer (self, rawContent, offset, streamPresent = False) : + def createPDFTrailer(self, rawContent, offset, streamPresent=False): ''' Create a PDFTrailer instance from the raw content of the PDF file @param rawContent String with the raw content of the PDF body (string) @@ -7624,13 +7685,12 @@ def createPDFTrailer (self, rawContent, offset, streamPresent = False) : @param streamPresent It specifies if an object stream exists in the PDF body @return A tuple (status,statusContent), where statusContent is the PDFTrailer in case status = 0 or an error in case status = -1 ''' - global pdfFile,isForceMode trailer = None self.charCounter = 0 - if not isinstance(rawContent,str): - return (-1,'Empty trailer content') - self.readSymbol(rawContent, 'trailer') - ret = self.readObject(rawContent[self.charCounter:],'dictionary') + if not isinstance(rawContent, str): + return (-1, 'Empty trailer content') + self.readSymbol(rawContent, 'trailer') + ret = self.readObject(rawContent[self.charCounter:], 'dictionary') if ret[0] == -1: dict = PDFDictionary('') dict.addError('Error creating the trailer dictionary') @@ -7639,7 +7699,7 @@ def createPDFTrailer (self, rawContent, offset, streamPresent = False) : ret = self.readSymbol(rawContent, 'startxref') if ret[0] == -1: try: - trailer = PDFTrailer(dict, streamPresent = streamPresent) + trailer = PDFTrailer(dict, streamPresent=streamPresent) except Exception as e: errorMessage = 'Error creating PDFTrailer' if e.message != '': @@ -7652,11 +7712,11 @@ def createPDFTrailer (self, rawContent, offset, streamPresent = False) : lastXrefSection = -1 pdfFile.addError('EOL not found while looking for the last cross reference section') else: - return (-1,'EOL not found while looking for the last cross reference section') + return (-1, 'EOL not found while looking for the last cross reference section') else: lastXrefSection = ret[1] try: - trailer = PDFTrailer(dict, lastXrefSection, streamPresent = streamPresent) + trailer = PDFTrailer(dict, lastXrefSection, streamPresent=streamPresent) except Exception as e: errorMessage = 'Error creating PDFTrailer' if e.message != '': @@ -7670,9 +7730,9 @@ def createPDFTrailer (self, rawContent, offset, streamPresent = False) : else: trailer.setEOFOffset(offset+eofOffset) trailer.setSize(eofOffset) - return (0,trailer) - - def createPDFTrailerFromStream (self, indirectObject, rawContent) : + return (0, trailer) + + def createPDFTrailerFromStream(self, indirectObject, rawContent): ''' Create a PDFTrailer instance from the raw content of the PDF file @param indirectObject Object stream object (PDFIndirectObject). @@ -7681,16 +7741,16 @@ def createPDFTrailerFromStream (self, indirectObject, rawContent) : ''' trailer = None self.charCounter = 0 - trailerElements = ['/Size','/Prev','/Root','/Encrypt','/Info','/ID'] + trailerElements = ['/Size', '/Prev', '/Root', '/Encrypt', '/Info', '/ID'] dict = {} - if indirectObject != None: + if indirectObject is not None: xrefStreamObject = indirectObject.getObject() - if xrefStreamObject != None: + if xrefStreamObject is not None: for element in trailerElements: if xrefStreamObject.hasElement(element): dict[element] = xrefStreamObject.getElementByName(element) try: - dict = PDFDictionary('',dict) + dict = PDFDictionary('', dict) except Exception as e: if isForceMode: dict = None @@ -7699,11 +7759,11 @@ def createPDFTrailerFromStream (self, indirectObject, rawContent) : if e.message != '': errorMessage += ': '+e.message return (-1, errorMessage) - if not isinstance(rawContent,str): + if not isinstance(rawContent, str): if isForceMode: lastXrefSection = -1 else: - return (-1,'Empty trailer content') + return (-1, 'Empty trailer content') else: ret = self.readUntilSymbol(rawContent, 'startxref') if ret[0] == -1 and not isForceMode: @@ -7727,42 +7787,40 @@ def createPDFTrailerFromStream (self, indirectObject, rawContent) : return (-1, errorMessage) trailer.setXrefStreamObject(indirectObject.getId()) else: - return (-1,'Object stream is None') + return (-1, 'Object stream is None') else: - return (-1,'Indirect object stream is None') - return (0,trailer) + return (-1, 'Indirect object stream is None') + return (0, trailer) - def getIndirectObjects(self, content, looseMode = False): + def getIndirectObjects(self, content, looseMode=False): ''' This function returns an array of raw indirect objects of the PDF file given the raw body. @param content: string with the raw content of the PDF body. @param looseMode: boolean specifies if the parsing process should search for the endobj tag or not. - @return matchingObjects: array of tuples (object_content,object_header). + @return matchingObjects: array of tuples (object_header+object_content,object_header). ''' - global pdfFile matchingObjects = [] - if not isinstance(content,str): + if not isinstance(content, str): return matchingObjects if not looseMode: - regExp = re.compile('((\d{1,10}\s\d{1,10}\sobj).*?endobj)',re.DOTALL) + regExp = re.compile('((\d{1,10}\s\d{1,10}\sobj).*?endobj)', re.DOTALL) matchingObjects = regExp.findall(content) else: - regExp = re.compile('((\d{1,10}\s\d{1,10}\sobj).*?)\s\d{1,10}\s\d{1,10}\sobj',re.DOTALL) - matchingObjectsAux = regExp.findall(content) - while matchingObjectsAux != []: - if matchingObjectsAux[0] != []: - objectBody = matchingObjectsAux[0][0] - matchingObjects.append(matchingObjectsAux[0]) - content = content[content.find(objectBody)+len(objectBody):] - matchingObjectsAux = regExp.findall(content) - else: - matchingObjectsAux = [] - lastObject = re.findall('(\d{1,5}\s\d{1,5}\sobj)',content,re.DOTALL) - if lastObject != []: - content = content[content.find(lastObject[0]):] - matchingObjects.append((content,lastObject[0])) + regExp = re.compile('\d{1,10}\s\d{1,10}\sobj') + match = regExp.search(content) + lastidx, lastobjbody, lasthead = 0, None, None + while match: + if lastobjbody: + matchingObjects.append((lasthead + lastobjbody, lasthead)) + lasthead = match.group(0) + lastidx += match.end() + match = regExp.search(content[lastidx:]) + if match: + lastobjbody = content[lastidx:lastidx+match.start()] + if lasthead: + matchingObjects.append((lasthead + content[lastidx:], lasthead)) return matchingObjects - + def getLines(self, content): ''' Simple function to return the lines separated by end of line characters @@ -7786,8 +7844,8 @@ def getLines(self, content): if i > 0: lines.append(content) return lines - - def readObject(self, content, objectType = None, forceMode = False, looseMode = False): + + def readObject(self, content, objectType=None, forceMode=False, looseMode=False): ''' Method to parse the raw body of the PDF file and obtain PDFObject instances @param content @@ -7796,13 +7854,17 @@ def readObject(self, content, objectType = None, forceMode = False, looseMode = @param looseMode @return A tuple (status,statusContent), where statusContent is a PDFObject instance in case status = 0 or an error in case status = -1 ''' - global pdfFile if len(content) == 0 or content[:6] == 'endobj': - return (-1,'Empty content reading object') + return (-1, 'Empty content reading object') pdfObject = None oldCounter = self.charCounter self.charCounter = 0 - if objectType != None: + # skip leading whitespace in case of sloppy reference offsets + self.readSpaces(content) + if self.charCounter > 0: + content = content[self.charCounter:] + self.charCounter = 0 + if objectType is not None: objectsTypeArray = [self.delimiters[i][2] for i in range(len(self.delimiters))] index = objectsTypeArray.index(objectType) if index != -1: @@ -7810,13 +7872,13 @@ def readObject(self, content, objectType = None, forceMode = False, looseMode = else: if isForceMode: pdfFile.addError('Unknown object type while parsing object') - return (-1,'Unknown object type') + return (-1, 'Unknown object type') else: sys.exit('Error: Unknown object type!!') else: delimiters = self.delimiters for delim in delimiters: - ret = self.readSymbol(content, delim[0]) + ret = self.readSymbol(content, delim[0], False if delim[0] == '(' else True) if ret[0] != -1: if delim[2] == 'dictionary': ret = self.readUntilClosingDelim(content, delim) @@ -7830,7 +7892,6 @@ def readObject(self, content, objectType = None, forceMode = False, looseMode = ret = self.readUntilSymbol(content, 'stream') if ret[0] == -1: return ret - auxDict = ret[1] self.readSymbol(content, 'stream', False) self.readUntilEndOfLine(content) self.readSymbol(content, '\r', False) @@ -7891,58 +7952,59 @@ def readObject(self, content, objectType = None, forceMode = False, looseMode = pdfObject.addError('Closing delimiter not found in array object') break elif delim[2] == 'name': - ret,raw = self.readUntilNotRegularChar(content) - pdfObject = PDFName(raw) + ret, raw = self.readUntilNotRegularChar(content) + if raw: + pdfObject = PDFName(raw) break elif delim[2] == 'comment': ret = self.readUntilEndOfLine(content) if ret[0] == 0: self.comments.append(ret[1]) self.readSpaces(content) - pdfObject = self.readObject(content[self.charCounter:],objectType) + pdfObject = self.readObject(content[self.charCounter:], objectType) else: return ret break else: if content[0] == 't' or content[0] == 'f': - ret,raw = self.readUntilNotRegularChar(content) + ret, raw = self.readUntilNotRegularChar(content) pdfObject = PDFBool(raw) elif content[0] == 'n': - ret,raw = self.readUntilNotRegularChar(content) + ret, raw = self.readUntilNotRegularChar(content) pdfObject = PDFNull(raw) elif re.findall('^(\d{1,10}\s{1,3}\d{1,10}\s{1,3}R)', content, re.DOTALL) != []: - ret,id = self.readUntilNotRegularChar(content) - ret,genNumber = self.readUntilNotRegularChar(content) + ret, id = self.readUntilNotRegularChar(content) + ret, genNumber = self.readUntilNotRegularChar(content) ret = self.readSymbol(content, 'R') if ret[0] == -1: return ret pdfObject = PDFReference(id, genNumber) elif re.findall('^([-+]?\.?\d{1,15}\.?\d{0,15})', content, re.DOTALL) != []: - ret,num = self.readUntilNotRegularChar(content) + ret, num = self.readUntilNotRegularChar(content) pdfObject = PDFNum(num) else: self.charCounter += oldCounter - return (-1,'Object not found') + return (-1, 'Object not found') self.charCounter += oldCounter - return (0,pdfObject) + return (0, pdfObject) def readSpaces(self, string): ''' Reads characters until all spaces chars have been read - @param string + @param string @return A tuple (status,statusContent), where statusContent is the number of characters read in case status = 0 or an error in case status = -1 ''' - if not isinstance(string,str): - return (-1,'Bad string') + if not isinstance(string, str): + return (-1, 'Bad string') spacesCounter = self.charCounter - for i in range(self.charCounter,len(string)): + for i in range(self.charCounter, len(string)): if string[i] not in spacesChars: break self.charCounter += 1 spacesCounter -= self.charCounter - return (0,spacesCounter) + return (0, spacesCounter) - def readSymbol(self, string, symbol, deleteSpaces = True): + def readSymbol(self, string, symbol, deleteSpaces=True): ''' Reads a given symbol from the string, removing comments and spaces (if specified) @param string @@ -7950,9 +8012,8 @@ def readSymbol(self, string, symbol, deleteSpaces = True): @param deleteSpaces @return A tuple (status,statusContent), where statusContent is the number of characters read in case status = 0 or an error in case status = -1 ''' - global pdfFile - if not isinstance(string,str): - return (-1,'Bad string') + if not isinstance(string, str): + return (-1, 'Bad string') oldCharCounter = self.charCounter if self.charCounter > len(string)-1: errorMessage = 'EOF while looking for symbol "'+symbol+'"' @@ -7967,12 +8028,12 @@ def readSymbol(self, string, symbol, deleteSpaces = True): symbolToRead = string[self.charCounter:self.charCounter+len(symbol)] if symbolToRead != symbol: errorMessage = 'Symbol "'+symbol+'" not found while parsing' - #pdfFile.addError(errorMessage) + # pdfFile.addError(errorMessage) return (-1, errorMessage) self.charCounter += len(symbol) if deleteSpaces: self.readSpaces(string) - return (0,self.charCounter - oldCharCounter) + return (0, self.charCounter - oldCharCounter) def readUntilClosingDelim(self, content, delim): ''' @@ -7981,12 +8042,10 @@ def readUntilClosingDelim(self, content, delim): @param delim @return A tuple (status,statusContent), where statusContent is the characters read in case status = 0 or an error in case status = -1 ''' - global pdfFile output = '' - if not isinstance(content,str): - return (-1,'Bad string') + if not isinstance(content, str): + return (-1, 'Bad string') newContent = content[self.charCounter:] - numOpeningDelims = newContent.count(delim[0]) + 1 numClosingDelims = newContent.count(delim[1]) if numClosingDelims == 0: errorMessage = 'No closing delimiter found' @@ -7995,7 +8054,7 @@ def readUntilClosingDelim(self, content, delim): elif numClosingDelims == 1: index = newContent.rfind(delim[1]) self.charCounter += index - return (0,newContent[:index]) + return (0, newContent[:index]) else: indexChar = 0 prevChar = '' @@ -8007,12 +8066,12 @@ def readUntilClosingDelim(self, content, delim): nextChar = newContent[indexChar+1] if char == delim[1] or (char + nextChar) == delim[1]: if char != ')' or indexChar == 0 or newContent[indexChar-1] != '\\': - return (0,output) + return (0, output) else: output += char indexChar += 1 self.charCounter += 1 - elif (char == '(' and prevChar != '\\') or (char in ['[','<'] and delim[0] != '('): + elif (char == '(' and prevChar != '\\') or (char in ['[', '<'] and delim[0] != '('): if (char + nextChar) != '<<': delimIndex = delimiterChars.index(char) self.charCounter += 1 @@ -8040,27 +8099,26 @@ def readUntilClosingDelim(self, content, delim): indexChar += 1 self.charCounter += 1 output += char - prevChar = char + prevChar = char else: errorMessage = 'No closing delimiter found' pdfFile.addError(errorMessage) return (-1, errorMessage) - + def readUntilEndOfLine(self, content): ''' This function reads characters until the end of line @param content @return A tuple (status,statusContent), where statusContent is the characters read in case status = 0 or an error in case status = -1 ''' - global pdfFile - if not isinstance(content,str): - return (-1,'Bad string') + if not isinstance(content, str): + return (-1, 'Bad string') errorMessage = [] oldCharCounter = self.charCounter tmpContent = content[self.charCounter:] for char in tmpContent: if char == '\r' or char == '\n': - return (0,content[oldCharCounter:self.charCounter]) + return (0, content[oldCharCounter:self.charCounter]) self.charCounter += 1 else: errorMessage = 'EOL not found' @@ -8074,9 +8132,8 @@ def readUntilLastSymbol(self, string, symbol): @param symbol @return A tuple (status,statusContent), where statusContent is the characters read in case status = 0 or an error in case status = -1 ''' - global pdfFile - if not isinstance(string,str): - return (-1,'Bad string') + if not isinstance(string, str): + return (-1, 'Bad string') newString = string[self.charCounter:] index = newString.rfind(symbol) if index == -1: @@ -8084,26 +8141,26 @@ def readUntilLastSymbol(self, string, symbol): pdfFile.addError(errorMessage) return (-1, errorMessage) self.charCounter += index - return (0,newString[:index]) - + return (0, newString[:index]) + def readUntilNotRegularChar(self, string): ''' Reads the regular chars of the string until it reachs a non-regular char. Then it removes spaces chars. - @param string + @param string @return A tuple (status,statusContent), where statusContent is the number of characters read in case status = 0 or an error in case status = -1 ''' readChars = '' - if not isinstance(string,str): - return (-1,'Bad string') + if not isinstance(string, str): + return (-1, 'Bad string') notRegChars = spacesChars + delimiterChars - for i in range(self.charCounter,len(string)): + for i in range(self.charCounter, len(string)): if string[i] in notRegChars: self.readSpaces(string) break readChars += string[i] self.charCounter += 1 - return (0,readChars) - + return (0, readChars) + def readUntilSymbol(self, string, symbol): ''' Method that reads characters until it finds the first appearance of 'symbol' @@ -8111,13 +8168,14 @@ def readUntilSymbol(self, string, symbol): @param symbol @return A tuple (status,statusContent), where statusContent is the characters read in case status = 0 or an error in case status = -1 ''' - global pdfFile - if not isinstance(string,str): - return (-1,'Bad string') + if not ((isinstance(string, bytes) and isinstance(symbol, bytes)) or (isinstance(string, str) and isinstance(symbol, str))): + return (-1, 'Bad string') + newString = string[self.charCounter:] + index = newString.find(symbol) if index == -1: errorMessage = 'Symbol "'+symbol+'" not found' return (-1, errorMessage) self.charCounter += index - return (0,newString[:index]) + return (0, newString[:index]) diff --git a/PDFCrypto.py b/peepdf/PDFCrypto.py similarity index 85% rename from PDFCrypto.py rename to peepdf/PDFCrypto.py index e16e345..ff95803 100644 --- a/PDFCrypto.py +++ b/peepdf/PDFCrypto.py @@ -21,21 +21,28 @@ # along with peepdf. If not, see . # -''' +''' Module to manage cryptographic operations with PDF files -''' +''' + +import hashlib +import itertools +import struct +import random +import warnings +import sys +import peepdf.aes +import six -import hashlib,struct,random,warnings,aes,sys -from itertools import cycle, izip warnings.filterwarnings("ignore") -paddingString = '\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A' +paddingString = b'\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A' -def computeEncryptionKey(password, dictOwnerPass, dictUserPass, dictOE, dictUE, fileID, pElement, dictKeyLength = 128, revision = 3, encryptMetadata = False, passwordType = None): +def computeEncryptionKey(password, dictOwnerPass, dictUserPass, dictOE, dictUE, fileID, pElement, dictKeyLength=128, revision=3, encryptMetadata=False, passwordType=None): ''' Compute an encryption key to encrypt/decrypt the PDF file - + @param password: The password entered by the user @param dictOwnerPass: The owner password from the standard security handler dictionary @param dictUserPass: The user password from the standard security handler dictionary @@ -57,7 +64,7 @@ def computeEncryptionKey(password, dictOwnerPass, dictUserPass, dictOE, dictUE, password = password[:32] elif lenPass < 32: password += paddingString[:32-lenPass] - md5input = password + dictOwnerPass + struct.pack(' 3 and not encryptMetadata: md5input += '\xFF'*4 key = hashlib.md5(md5input).digest() @@ -75,32 +82,32 @@ def computeEncryptionKey(password, dictOwnerPass, dictUserPass, dictOE, dictUE, password = password.encode('utf-8')[:127] kSalt = dictUserPass[40:48] intermediateKey = hashlib.sha256(password + kSalt).digest() - ret = aes.decryptData('\0'*16+dictUE, intermediateKey) + ret = peepdf.aes.decryptData('\0'*16+dictUE, intermediateKey) elif passwordType == 'OWNER': password = password.encode('utf-8')[:127] kSalt = dictOwnerPass[40:48] intermediateKey = hashlib.sha256(password + kSalt + dictUserPass).digest() - ret = aes.decryptData('\0'*16+dictOE, intermediateKey) + ret = peepdf.aes.decryptData('\0'*16+dictOE, intermediateKey) return ret except: - return (-1, 'ComputeEncryptionKey error: %s %s' % (str(sys.exc_info()[0]),str(sys.exc_info()[1]))) + return (-1, 'ComputeEncryptionKey error: %s %s' % (str(sys.exc_info()[0]), str(sys.exc_info()[1]))) -def computeObjectKey(id, generationNum, encryptionKey, keyLengthBytes, algorithm = 'RC4'): +def computeObjectKey(id, generationNum, encryptionKey, keyLengthBytes, algorithm='RC4'): ''' Compute the key necessary to encrypt each object, depending on the id and generation number. Only necessary with /V < 5. - + @param id: The object id @param generationNum: The generation number of the object @param encryptionKey: The encryption key @param keyLengthBytes: The length of the encryption key in bytes @param algorithm: The algorithm used in the encryption/decryption process @return A tuple (status,statusContent), where statusContent is the computed key in case status = 0 or an error message in case status = -1 - ''' + ''' try: - key = encryptionKey + struct.pack(' 2: counter = 1 while counter <= 19: newKey = '' for i in range(len(rc4Key)): newKey += chr(ord(rc4Key[i]) ^ counter) - ownerPass = RC4(ownerPass,newKey) + ownerPass = RC4(ownerPass, newKey) counter += 1 return (0, ownerPass) except: - return (-1, 'ComputeOwnerPass error: %s %s' % (str(sys.exc_info()[0]),str(sys.exc_info()[1]))) + return (-1, 'ComputeOwnerPass error: %s %s' % (str(sys.exc_info()[0]), str(sys.exc_info()[1]))) -def computeUserPass(userPassString, dictO, fileID, pElement, keyLength = 128, revision = 3, encryptMetadata = False): +def computeUserPass(userPassString, dictO, fileID, pElement, keyLength=128, revision=3, encryptMetadata=False): ''' Compute the user password of the PDF file - + @param userPassString: The user password entered by the user @param ownerPass: The computed owner password @param fileID: The /ID element in the trailer dictionary of the PDF file @@ -172,7 +179,7 @@ def computeUserPass(userPassString, dictO, fileID, pElement, keyLength = 128, re # TODO: revision 5 userPass = '' dictU = '' - dictOE = '' + dictOE = '' dictUE = '' ret = computeEncryptionKey(userPassString, dictO, dictU, dictOE, dictUE, fileID, pElement, keyLength, revision, encryptMetadata) if ret[0] != -1: @@ -181,34 +188,34 @@ def computeUserPass(userPassString, dictO, fileID, pElement, keyLength = 128, re return ret try: if revision == 2: - userPass = RC4(paddingString,rc4Key) + userPass = RC4(paddingString, rc4Key) elif revision > 2: counter = 1 md5Input = paddingString + fileID hashResult = hashlib.md5(md5Input).digest() - userPass = RC4(hashResult,rc4Key) + userPass = RC4(hashResult, rc4Key) while counter <= 19: newKey = '' for i in range(len(rc4Key)): newKey += chr(ord(rc4Key[i]) ^ counter) - userPass = RC4(userPass,newKey) + userPass = RC4(userPass, newKey) counter += 1 counter = 0 while counter < 16: - userPass += chr(random.randint(32,255)) + userPass += chr(random.randint(32, 255)) counter += 1 else: # This should not be possible or the PDF specification does not say anything about it return (-1, 'ComputeUserPass error: revision number is < 2 (%d)' % revision) return (0, userPass) except: - return (-1, 'ComputeUserPass error: %s %s' % (str(sys.exc_info()[0]),str(sys.exc_info()[1]))) - + return (-1, 'ComputeUserPass error: %s %s' % (str(sys.exc_info()[0]), str(sys.exc_info()[1]))) + def isUserPass(password, computedUserPass, dictU, revision): ''' Checks if the given password is the User password of the file - + @param password: The given password or the empty password @param computedUserPass: The computed user password of the file @param dictU: The /U element of the /Encrypt dictionary @@ -221,7 +228,7 @@ def isUserPass(password, computedUserPass, dictU, revision): if inputHash == dictU[:32]: return True else: - return False + return False elif revision == 3 or revision == 4: if computedUserPass[:16] == dictU[:16]: return True @@ -236,7 +243,7 @@ def isUserPass(password, computedUserPass, dictU, revision): def isOwnerPass(password, dictO, dictU, computedUserPass, keyLength, revision): ''' Checks if the given password is the owner password of the file - + @param password: The given password or the empty password @param dictO: The /O element of the /Encrypt dictionary @param dictU: The /U element of the /Encrypt dictionary @@ -251,7 +258,7 @@ def isOwnerPass(password, dictO, dictU, computedUserPass, keyLength, revision): if inputHash == dictO[:32]: return True else: - return False + return False else: keyLength = keyLength/8 lenPass = len(password) @@ -274,7 +281,7 @@ def isOwnerPass(password, dictO, dictU, computedUserPass, keyLength, revision): newKey = '' for i in range(len(rc4Key)): newKey += chr(ord(rc4Key[i]) ^ counter) - dictO = RC4(dictO,newKey) + dictO = RC4(dictO, newKey) counter -= 1 userPass = dictO else: @@ -286,37 +293,43 @@ def isOwnerPass(password, dictO, dictU, computedUserPass, keyLength, revision): def RC4(data, key): ''' RC4 implementation - + @param data: Bytes to be encrypyed/decrypted @param key: Key used for the algorithm @return: The encrypted/decrypted bytes - ''' + ''' y = 0 hash = {} box = {} ret = '' - keyLength = len(key) + keyLength = len(key) dataLength = len(data) - - #Initialization + + # Initialization for x in range(256): - hash[x] = ord(key[x % keyLength]) + if six.PY3: + hash[x] = ord(chr(key[x % keyLength])) + else: + hash[x] = ord(key[x % keyLength]) box[x] = x for x in range(256): y = (y + int(box[x]) + int(hash[x])) % 256 tmp = box[x] box[x] = box[y] - box[y] = tmp + box[y] = tmp z = y = 0 for x in range(0, dataLength): - z = (z + 1) % 256 + z = (z + 1) % 256 y = (y + box[z]) % 256 tmp = box[z] box[z] = box[y] box[y] = tmp k = box[((box[z] + box[y]) % 256)] - ret += chr(ord(data[x]) ^ k) + if six.PY3: + ret += chr(data[x] ^ k) + else: + ret += chr(ord(data[x]) ^ k) return ret @@ -326,10 +339,10 @@ def RC4(data, key): def xor(bytes, key): ''' Simple XOR implementation - + @param bytes: Bytes to be xored @param key: Key used for the operation, it's cycled. @return: The xored bytes ''' - key = cycle(key) - return ''.join(chr(ord(x) ^ ord(y)) for (x,y) in izip(bytes, key)) \ No newline at end of file + key = itertools.cycle(key) + return ''.join(chr(ord(x) ^ ord(y)) for (x, y) in zip(bytes, key)) diff --git a/PDFFilters.py b/peepdf/PDFFilters.py similarity index 90% rename from PDFFilters.py rename to peepdf/PDFFilters.py index 72c3d7b..3a7cfdc 100644 --- a/PDFFilters.py +++ b/peepdf/PDFFilters.py @@ -56,15 +56,20 @@ Module to manage encoding/decoding in PDF files ''' -import sys, zlib, lzw, struct -from PDFUtils import getNumsFromBytes, getBytesFromBits, getBitsFromNum -from ccitt import CCITTFax +import zlib +import struct +import six + +import peepdf.lzw + +from peepdf.PDFUtils import getNumsFromBytes, getBytesFromBits, getBitsFromNum +from peepdf.ccitt import CCITTFax def decodeStream(stream, filter, parameters={}): ''' Decode the given stream - + @param stream: Stream to be decoded (string) @param filter: Filter to apply to decode the stream @param parameters: List of PDFObjects containing the parameters for the filter @@ -98,7 +103,7 @@ def decodeStream(stream, filter, parameters={}): def encodeStream(stream, filter, parameters={}): ''' Encode the given stream - + @param stream: Stream to be decoded (string) @param filter: Filter to apply to decode the stream @param parameters: List of PDFObjects containing the parameters for the filter @@ -138,20 +143,20 @@ def encodeStream(stream, filter, parameters={}): The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ In ASCII85 encoding, every four bytes are encoded with five ASCII letters, using 85 different types of characters (as 256**4 < 85**5). When the length of the original bytes is not a multiple of 4, a special rule is used for round up. - + The Adobe's ASCII85 implementation is slightly different from its original in handling the last characters. - + The sample string is taken from: http://en.wikipedia.org/w/index.php?title=Ascii85 - + >>> ascii85decode('9jqo^BlbD-BleB1DJ+*+F(f,q') 'Man is distinguished' >>> ascii85decode('E,9)oF*2M7/c~>') @@ -164,7 +169,7 @@ def encodeStream(stream, filter, parameters={}): def ascii85Decode(stream): ''' Method to decode streams using ASCII85 - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' @@ -195,18 +200,17 @@ def ascii85Decode(stream): def ascii85Encode(stream): ''' Method to encode streams using ASCII85 (NOT SUPPORTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' return (-1, 'Ascii85Encode not supported yet') def asciiHexDecode(stream): ''' Method to decode streams using hexadecimal encoding - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' @@ -241,7 +245,7 @@ def asciiHexDecode(stream): def asciiHexEncode(stream): ''' Method to encode streams using hexadecimal encoding - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' @@ -255,43 +259,48 @@ def asciiHexEncode(stream): def flateDecode(stream, parameters): ''' Method to decode streams using the Flate algorithm - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' decodedStream = '' try: - decodedStream = zlib.decompress(stream) - except: + if six.PY3: + decodedStream = zlib.decompress(stream.encode('latin-1')) + decodedStream = decodedStream.decode('latin-1') + + else: + decodedStream = zlib.decompress(stream) + except Exception as a: return (-1, 'Error decompressing string') - if parameters == None or parameters == {}: + if parameters is None or parameters == {}: return (0, decodedStream) else: - if parameters.has_key('/Predictor'): + if "/Predictor" in parameters: predictor = parameters['/Predictor'].getRawValue() else: predictor = 1 # Columns = number of samples per row - if parameters.has_key('/Columns'): + if "/Columns" in parameters: columns = parameters['/Columns'].getRawValue() else: columns = 1 # Colors = number of components per sample - if parameters.has_key('/Colors'): + if "/Colors" in parameters: colors = parameters['/Colors'].getRawValue() if colors < 1: colors = 1 else: colors = 1 # BitsPerComponent: number of bits per color component - if parameters.has_key('/BitsPerComponent'): + if "/BitsPerComponent" in parameters: bits = parameters['/BitsPerComponent'].getRawValue() if bits not in [1, 2, 4, 8, 16]: bits = 8 else: bits = 8 - if predictor != None and predictor != 1: + if predictor is not None and predictor != 1: ret = post_prediction(decodedStream, predictor, columns, colors, bits) return ret else: @@ -301,41 +310,40 @@ def flateDecode(stream, parameters): def flateEncode(stream, parameters): ''' Method to encode streams using the Flate algorithm - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' - if parameters == None or parameters == {}: + if parameters is None or parameters == {}: try: return (0, zlib.compress(stream)) except: return (-1, 'Error compressing string') else: - if parameters.has_key('/Predictor'): + if "/Predicator" in parameters: predictor = parameters['/Predictor'].getRawValue() else: predictor = 1 # Columns = number of samples per row - if parameters.has_key('/Columns'): + if "/Columns" in parameters: columns = parameters['/Columns'].getRawValue() else: columns = 1 # Colors = number of components per sample - if parameters.has_key('/Colors'): + if "/Colors" in parameters: colors = parameters['/Colors'].getRawValue() if colors < 1: colors = 1 else: colors = 1 # BitsPerComponent: number of bits per color component - if parameters.has_key('/BitsPerComponent'): + if "/BitsPerComponent" in parameters: bits = parameters['/BitsPerComponent'].getRawValue() if bits not in [1, 2, 4, 8, 16]: bits = 8 else: bits = 8 - if predictor != None and predictor != 1: + if predictor is not None and predictor != 1: ret = pre_prediction(stream, predictor, columns, colors, bits) if ret[0] == -1: return ret @@ -351,47 +359,43 @@ def flateEncode(stream, parameters): def lzwDecode(stream, parameters): ''' Method to decode streams using the LZW algorithm - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' decodedStream = '' try: - decodedStream = lzw.lzwdecode(stream) + decodedStream = peepdf.lzw.lzwdecode(stream) except: return (-1, 'Error decompressing string') - if parameters == None or parameters == {}: + if parameters is None or parameters == {}: return (0, decodedStream) else: - if parameters.has_key('/Predictor'): + if "/Predictor" in parameters: predictor = parameters['/Predictor'].getRawValue() else: predictor = 1 # Columns = number of samples per row - if parameters.has_key('/Columns'): + if "/Columns" in parameters: columns = parameters['/Columns'].getRawValue() else: columns = 1 # Colors = number of components per sample - if parameters.has_key('/Colors'): + if "/Colors" in parameters: colors = parameters['/Colors'].getRawValue() if colors < 1: colors = 1 else: colors = 1 # BitsPerComponent: number of bits per color component - if parameters.has_key('/BitsPerComponent'): + if "/BitsPerComponent" in parameters: bits = parameters['/BitsPerComponent'].getRawValue() if bits not in [1, 2, 4, 8, 16]: bits = 8 else: bits = 8 - if parameters.has_key('/EarlyChange'): - earlyChange = parameters['/EarlyChange'].getRawValue() - else: - earlyChange = 1 - if predictor != None and predictor != 1: + if predictor is not None and predictor != 1: ret = post_prediction(decodedStream, predictor, columns, colors, bits) return ret else: @@ -401,48 +405,44 @@ def lzwDecode(stream, parameters): def lzwEncode(stream, parameters): ''' Method to encode streams using the LZW algorithm - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' encodedStream = '' - if parameters == None or parameters == {}: + if parameters is None or parameters == {}: try: - generator = lzw.compress(stream) + generator = peepdf.lzw.compress(stream) for c in generator: encodedStream += c return (0, encodedStream) except: return (-1, 'Error compressing string') else: - if parameters.has_key('/Predictor'): + if "/Predictor" in parameters: predictor = parameters['/Predictor'].getRawValue() else: predictor = 1 # Columns = number of samples per row - if parameters.has_key('/Columns'): + if "/Columns" in parameters: columns = parameters['/Columns'].getRawValue() else: columns = 1 # Colors = number of components per sample - if parameters.has_key('/Colors'): + if "/Colors" in parameters: colors = parameters['/Colors'].getRawValue() if colors < 1: colors = 1 else: colors = 1 # BitsPerComponent: number of bits per color component - if parameters.has_key('/BitsPerComponent'): + if "/BitsPerComponent" in parameters: bits = parameters['/BitsPerComponent'].getRawValue() if bits not in [1, 2, 4, 8, 16]: bits = 8 else: bits = 8 - if parameters.has_key('/EarlyChange'): - earlyChange = parameters['/EarlyChange'].getRawValue() - else: - earlyChange = 1 - if predictor != None and predictor != 1: + if predictor is not None and predictor != 1: ret = pre_prediction(stream, predictor, columns, colors, bits) if ret[0] == -1: return ret @@ -450,7 +450,7 @@ def lzwEncode(stream, parameters): else: output = stream try: - generator = lzw.compress(output) + generator = peepdf.lzw.compress(output) for c in generator: encodedStream += c return (0, encodedStream) @@ -461,7 +461,7 @@ def lzwEncode(stream, parameters): def pre_prediction(stream, predictor, columns, colors, bits): ''' Predictor function to make the stream more predictable and improve compression (PDF Specification) - + @param stream: The stream to be modified @param predictor: The type of predictor to apply @param columns: Number of samples per row @@ -471,12 +471,12 @@ def pre_prediction(stream, predictor, columns, colors, bits): ''' output = '' - #TODO: TIFF and more PNG predictions + # TODO: TIFF and more PNG predictions # PNG prediction if predictor >= 10 and predictor <= 15: # PNG prediction can vary from row to row - for row in xrange(len(stream) / columns): + for row in range(len(stream) / columns): rowdata = [ord(x) for x in stream[(row * columns):((row + 1) * columns)]] filterByte = predictor - 10 rowdata = [filterByte] + rowdata @@ -501,7 +501,7 @@ def pre_prediction(stream, predictor, columns, colors, bits): def post_prediction(decodedStream, predictor, columns, colors, bits): ''' Predictor function to obtain the real stream, removing the prediction (PDF Specification) - + @param decodedStream: The decoded stream to be modified @param predictor: The type of predictor to apply @param columns: Number of samples per row @@ -544,7 +544,7 @@ def post_prediction(decodedStream, predictor, columns, colors, bits): numSamplesPerRow = columns + 1 bytesPerSample = (colors * bits + 7) / 8 upRowdata = (0,) * numSamplesPerRow - for row in xrange(numRows): + for row in range(numRows): rowdata = [ord(x) for x in decodedStream[(row * bytesPerRow):((row + 1) * bytesPerRow)]] # PNG prediction can vary from row to row filterByte = rowdata[0] @@ -598,7 +598,7 @@ def post_prediction(decodedStream, predictor, columns, colors, bits): rowdata[i] = (rowdata[i] + nearest) % 256 else: # Optimum - 15 - #return (-1,'Unsupported predictor') + # return (-1,'Unsupported predictor') pass upRowdata = rowdata output += (''.join([chr(x) for x in rowdata[1:]])) @@ -610,7 +610,7 @@ def post_prediction(decodedStream, predictor, columns, colors, bits): def runLengthDecode(stream): ''' Method to decode streams using the Run-Length algorithm - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' @@ -635,24 +635,23 @@ def runLengthDecode(stream): def runLengthEncode(stream): ''' Method to encode streams using the Run-Length algorithm (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' return (-1, 'RunLengthEncode not supported yet') def ccittFaxDecode(stream, parameters): ''' Method to decode streams using the CCITT facsimile standard - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' decodedStream = '' - if parameters == None or parameters == {}: + if parameters is None or parameters == {}: try: decodedStream = CCITTFax().decode(stream) return (0, decodedStream) @@ -660,7 +659,7 @@ def ccittFaxDecode(stream, parameters): return (-1, 'Error decompressing string') else: # K = A code identifying the encoding scheme used - if parameters.has_key('/K'): + if "/K" in parameters: k = parameters['/K'].getRawValue() if type(k) != int: k = 0 @@ -671,7 +670,7 @@ def ccittFaxDecode(stream, parameters): else: k = 0 # EndOfLine = A flag indicating whether end-of-line bit patterns are required to be present in the encoding. - if parameters.has_key('/EndOfLine'): + if "/EndOfLine" in parameters: eol = parameters['/EndOfLine'].getRawValue() if eol == 'true': eol = True @@ -680,7 +679,7 @@ def ccittFaxDecode(stream, parameters): else: eol = False # EncodedByteAlign = A flag indicating whether the filter expects extra 0 bits before each encoded line so that the line begins on a byte boundary - if parameters.has_key('/EncodedByteAlign'): + if "/EncodeByteAlign" in parameters: byteAlign = parameters['/EncodedByteAlign'].getRawValue() if byteAlign == 'true': byteAlign = True @@ -689,21 +688,21 @@ def ccittFaxDecode(stream, parameters): else: byteAlign = False # Columns = The width of the image in pixels. - if parameters.has_key('/Columns'): + if "/Columns" in parameters: columns = parameters['/Columns'].getRawValue() if type(columns) != int: columns = 1728 else: columns = 1728 # Rows = The height of the image in scan lines. - if parameters.has_key('/Rows'): + if "/Rows" in parameters: rows = parameters['/Rows'].getRawValue() if type(rows) != int: rows = 0 else: rows = 0 # EndOfBlock = number of samples per row - if parameters.has_key('/EndOfBlock'): + if "/EndOfBlock" in parameters: eob = parameters['/EndOfBlock'].getRawValue() if eob == 'false': eob = False @@ -712,7 +711,7 @@ def ccittFaxDecode(stream, parameters): else: eob = True # BlackIs1 = A flag indicating whether 1 bits are to be interpreted as black pixels and 0 bits as white pixels - if parameters.has_key('/BlackIs1'): + if "/BlackIs1" in parameters: blackIs1 = parameters['/BlackIs1'].getRawValue() if blackIs1 == 'true': blackIs1 = True @@ -721,7 +720,7 @@ def ccittFaxDecode(stream, parameters): else: blackIs1 = False # DamagedRowsBeforeError = The number of damaged rows of data to be tolerated before an error occurs - if parameters.has_key('/DamagedRowsBeforeError'): + if "/DamagedRowsBeforeError" in parameters: damagedRowsBeforeError = parameters['/DamagedRowsBeforeError'].getRawValue() else: damagedRowsBeforeError = 0 @@ -736,74 +735,71 @@ def ccittFaxDecode(stream, parameters): def ccittFaxEncode(stream, parameters): ''' Method to encode streams using the CCITT facsimile standard (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' return (-1, 'CcittFaxEncode not supported yet') def crypt(stream, parameters): ''' Method to encrypt streams using a PDF security handler (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encrypted PDF stream in case status = 0 or an error in case status = -1 ''' - decodedStream = '' - if parameters == None or parameters == {}: + if parameters is None or parameters == {}: return (0, stream) else: - if not parameters.has_key('/Name') or parameters['/Name'] == None: + if "/Name" not in parameters or parameters['/Name'] is None: return (0, stream) else: cryptFilterName = parameters['/Name'].getValue() if cryptFilterName == 'Identity': return (0, stream) else: - #TODO: algorithm is cryptFilterName, specified in the /CF dictionary + # TODO: algorithm is cryptFilterName, specified in the /CF dictionary return (-1, 'Crypt not supported yet') def decrypt(stream, parameters): ''' Method to decrypt streams using a PDF security handler (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decrypted PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' - if parameters == None or parameters == {}: + if parameters is None or parameters == {}: return (0, stream) else: - if not parameters.has_key('/Name') or parameters['/Name'] == None: + if "/Name" not in parameters or parameters['/Name'] is None: return (0, stream) else: cryptFilterName = parameters['/Name'].getValue() if cryptFilterName == 'Identity': return (0, stream) else: - #TODO: algorithm is cryptFilterName, specified in the /CF dictionary + # TODO: algorithm is cryptFilterName, specified in the /CF dictionary return (-1, 'Decrypt not supported yet') def dctDecode(stream, parameters): ''' Method to decode streams using a DCT technique based on the JPEG standard (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' decodedStream = '' try: from PIL import Image - import StringIO + import io except: return (-1, 'Python Imaging Library (PIL) not installed') # Quick implementation, assuming the library can detect the parameters try: - im = Image.open(StringIO.StringIO(stream)) + im = Image.open(io.StringIO(stream)) decodedStream = im.tostring() return (0, decodedStream) except: @@ -813,53 +809,48 @@ def dctDecode(stream, parameters): def dctEncode(stream, parameters): ''' Method to encode streams using a DCT technique based on the JPEG standard (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' return (-1, 'DctEncode not supported yet') def jbig2Decode(stream, parameters): ''' Method to decode streams using the JBIG2 standard (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' - decodedStream = '' return (-1, 'Jbig2Decode not supported yet') def jbig2Encode(stream, parameters): ''' Method to encode streams using the JBIG2 standard (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' return (-1, 'Jbig2Encode not supported yet') def jpxDecode(stream): ''' Method to decode streams using the JPEG2000 standard (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the decoded PDF stream in case status = 0 or an error in case status = -1 ''' - decodedStream = '' return (-1, 'JpxDecode not supported yet') def jpxEncode(stream): ''' Method to encode streams using the JPEG2000 standard (NOT IMPLEMENTED YET) - + @param stream: A PDF stream @return: A tuple (status,statusContent), where statusContent is the encoded PDF stream in case status = 0 or an error in case status = -1 ''' - encodedStream = '' - return (-1, 'JpxEncode not supported yet') \ No newline at end of file + return (-1, 'JpxEncode not supported yet') diff --git a/peepdf/PDFUtils.py b/peepdf/PDFUtils.py new file mode 100644 index 0000000..8624f9e --- /dev/null +++ b/peepdf/PDFUtils.py @@ -0,0 +1,441 @@ +# +# peepdf is a tool to analyse and modify PDF files +# http://peepdf.eternal-todo.com +# By Jose Miguel Esparza +# +# Copyright (C) 2011-2017 Jose Miguel Esparza +# +# This file is part of peepdf. +# +# peepdf is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# peepdf is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with peepdf. If not, see . +# + +''' + Module with some misc functions +''' + +import os +import re +import html.entities + +def clearScreen(): + ''' + Simple method to clear the screen depending on the OS + ''' + if os.name == 'nt': + os.system('cls') + elif os.name == 'posix': + os.system('reset') + elif os.name == 'mac': + os.system('clear') + +def countArrayElements(array): + ''' + Simple method to count the repetitions of elements in an array + + @param array: An array of elements + @return: A tuple (elements,counters), where elements is a list with the distinct elements and counters is the list with the number of times they appear in the array + ''' + elements = [] + counters = [] + for element in array: + if element in elements: + indx = elements.index(element) + counters[indx] += 1 + else: + elements.append(element) + counters.append(1) + return elements, counters + +def countNonPrintableChars(string): + ''' + Simple method to return the non printable characters found in an string + + @param string: A string + @return: Number of non printable characters in the string + ''' + counter = 0 + for i in range(len(string)): + if ord(string[i]) <= 31 or ord(string[i]) > 127: + counter += 1 + return counter + +def decodeName(name): + ''' + Decode the given PDF name + + @param name: A PDFName string to decode + @return: A tuple (status,statusContent), where statusContent is the decoded PDF name in case status = 0 or an error in case status = -1 + ''' + decodedName = name + hexNumbers = re.findall('#([0-9a-f]{2})', name, re.DOTALL | re.IGNORECASE) + for hexNumber in hexNumbers: + try: + decodedName = decodedName.replace('#'+hexNumber, chr(int(hexNumber, 16))) + except: + return (-1, 'Error decoding name') + return (0, decodedName) + +def decodeString(string): + ''' + Decode the given PDF string + + @param string: A PDFString to decode + @return A tuple (status,statusContent), where statusContent is the decoded PDF string in case status = 0 or an error in case status = -1 + ''' + decodedString = string + octalNumbers = re.findall('\\\\([0-7]{1-3})', decodedString, re.DOTALL) + for octal in octalNumbers: + try: + decodedString = decodedString.replace('\\\\'+octal, chr(int(octal, 8))) + except: + return (-1, 'Error decoding string') + return (0, decodedString) + +def encodeName(name): + ''' + Encode the given PDF name + + @param name: A PDFName string to encode + @return: A tuple (status,statusContent), where statusContent is the encoded PDF name in case status = 0 or an error in case status = -1 + ''' + encodedName = '' + if name[0] == '/': + name = name[1:] + for char in name: + if char == '\0': + encodedName += char + else: + try: + hex = '%x' % ord(char) + encodedName += '#'+hex + except: + return (-1, 'Error encoding name') + return (0, '/'+encodedName) + +def encodeString(string): + ''' + Encode the given PDF string + + @param string: A PDFString to encode + @return: A tuple (status,statusContent), where statusContent is the encoded PDF string in case status = 0 or an error in case status = -1 + ''' + encodedString = '' + try: + for char in string: + octal = '%o' % ord(char) + encodedString += '\\'+(3-len(octal))*'0'+octal + except: + return (-1, 'Error encoding string') + return (0, encodedString) + +def escapeRegExpString(string): + ''' + Escape the given string to include it as a regular expression + + @param string: A regular expression to be escaped + @return: Escaped string + ''' + toEscapeChars = ['\\', '(', ')', '.', '|', '^', '$', '*', '+', '?', '[', ']'] + escapedValue = '' + for i in range(len(string)): + if string[i] in toEscapeChars: + escapedValue += '\\'+string[i] + else: + escapedValue += string[i] + return escapedValue + +def escapeString(string): + ''' + Escape the given string + + @param string: A string to be escaped + @return: Escaped string + ''' + toEscapeChars = ['\\', '(', ')'] + escapedValue = '' + for i in range(len(string)): + if string[i] in toEscapeChars and (i == 0 or string[i-1] != '\\'): + if string[i] == '\\': + if len(string) > i+1 and re.match('[0-7]', string[i+1]): + escapedValue += string[i] + else: + escapedValue += '\\'+string[i] + else: + escapedValue += '\\'+string[i] + elif string[i] == '\r': + escapedValue += '\\r' + elif string[i] == '\n': + escapedValue += '\\n' + elif string[i] == '\t': + escapedValue += '\\t' + elif string[i] == '\b': + escapedValue += '\\b' + elif string[i] == '\f': + escapedValue += '\\f' + else: + escapedValue += string[i] + return escapedValue + +def getBitsFromNum(num, bitsPerComponent=8): + ''' + Makes the conversion between number and bits + + @param num: Number to be converted + @param bitsPerComponent: Number of bits needed to represent a component + @return: A tuple (status,statusContent), where statusContent is the string containing the resulting bits in case status = 0 or an error in case status = -1 + ''' + if not isinstance(num, int): + return (-1, 'num must be an integer') + if not isinstance(bitsPerComponent, int): + return (-1, 'bitsPerComponent must be an integer') + try: + bitsRepresentation = bin(num) + bitsRepresentation = bitsRepresentation.replace('0b', '') + mod = len(bitsRepresentation) % 8 + if mod != 0: + bitsRepresentation = '0'*(8-mod) + bitsRepresentation + bitsRepresentation = bitsRepresentation[-1*bitsPerComponent:] + except: + return (-1, 'Error in conversion from number to bits') + return (0, bitsRepresentation) + + +def getNumsFromBytes(bytes, bitsPerComponent=8): + ''' + Makes the conversion between bytes and numbers, depending on the number of bits used per component. + + @param bytes: String representing the bytes to be converted + @param bitsPerComponent: Number of bits needed to represent a component + @return: A tuple (status,statusContent), where statusContent is a list of numbers in case status = 0 or an error in case status = -1 + ''' + if not isinstance(bytes, str): + return (-1, 'bytes must be a string') + if not isinstance(bitsPerComponent, int): + return (-1, 'bitsPerComponent must be an integer') + outputComponents = [] + bitsStream = '' + for byte in bytes: + try: + bitsRepresentation = bin(ord(byte)) + bitsRepresentation = bitsRepresentation.replace('0b', '') + bitsRepresentation = '0'*(8-len(bitsRepresentation)) + bitsRepresentation + bitsStream += bitsRepresentation + except: + return (-1, 'Error in conversion from bytes to bits') + + try: + for i in range(0, len(bitsStream), bitsPerComponent): + bytes = '' + bits = bitsStream[i:i+bitsPerComponent] + num = int(bits, 2) + outputComponents.append(num) + except: + return (-1, 'Error in conversion from bits to bytes') + return (0, outputComponents) + +def getBytesFromBits(bitsStream): + ''' + Makes the conversion between bits and bytes. + + @param bitsStream: String representing a chain of bits + @return: A tuple (status,statusContent), where statusContent is the string containing the resulting bytes in case status = 0 or an error in case status = -1 + ''' + if not isinstance(bitsStream, str): + return (-1, 'The bitsStream must be a string') + bytes = '' + if re.match('[01]*$', bitsStream): + try: + for i in range(0, len(bitsStream), 8): + bits = bitsStream[i:i+8] + byte = chr(int(bits, 2)) + bytes += byte + except: + return (-1, 'Error in conversion from bits to bytes') + return (0, bytes) + else: + return (-1, 'The format of the bit stream is not correct') + +def getBytesFromFile(filename, offset, numBytes): + ''' + Returns the number of bytes specified from a file, starting from the offset specified + + @param filename: Name of the file + @param offset: Bytes offset + @param numBytes: Number of bytes to retrieve + @return: A tuple (status,statusContent), where statusContent is the bytes read in case status = 0 or an error in case status = -1 + ''' + if not isinstance(offset, int) or not isinstance(numBytes, int): + return (-1, 'The offset and the number of bytes must be integers') + if os.path.exists(filename): + fileSize = os.path.getsize(filename) + bytesFile = open(filename, 'rb') + bytesFile.seek(offset) + if offset+numBytes > fileSize: + bytes = bytesFile.read() + else: + bytes = bytesFile.read(numBytes) + bytesFile.close() + return (0, bytes) + else: + return (-1, 'File does not exist') + +def hexToString(hexString): + ''' + Simple method to convert an hexadecimal string to ascii string + + @param hexString: A string in hexadecimal format + @return: A tuple (status,statusContent), where statusContent is an ascii string in case status = 0 or an error in case status = -1 + ''' + string = '' + if len(hexString) % 2 != 0: + hexString = '0'+hexString + try: + for i in range(0, len(hexString), 2): + string += chr(int(hexString[i]+hexString[i+1], 16)) + except: + return (-1, 'Error in hexadecimal conversion') + return (0, string) + +def numToHex(num, numBytes): + ''' + Given a number returns its hexadecimal format with the specified length, adding '\0' if necessary + + @param num: A number (int) + @param numBytes: Length of the output (int) + @return: A tuple (status,statusContent), where statusContent is a number in hexadecimal format in case status = 0 or an error in case status = -1 + ''' + hexString = '' + if not isinstance(num, int): + return (-1, 'Bad number') + try: + hexNumber = hex(num)[2:] + if len(hexNumber) % 2 != 0: + hexNumber = '0'+hexNumber + for i in range(0, len(hexNumber)-1, 2): + hexString += chr(int(hexNumber[i]+hexNumber[i+1], 16)) + hexString = '\0'*(numBytes-len(hexString))+hexString + except: + return (-1, 'Error in hexadecimal conversion') + return (0, hexString) + +def numToString(num, numDigits): + ''' + Given a number returns its string format with the specified length, adding '0' if necessary + + @param num: A number (int) + @param numDigits: Length of the output string (int) + @return: A tuple (status,statusContent), where statusContent is a number in string format in case status = 0 or an error in case status = -1 + ''' + if not isinstance(num, int): + return (-1, 'Bad number') + strNum = str(num) + if numDigits < len(strNum): + return (-1, 'Bad digit number') + for i in range(numDigits-len(strNum)): + strNum = '0' + strNum + return (0, strNum) + +def unescapeHTMLEntities(text): + ''' + Removes HTML or XML character references and entities from a text string. + + @param text The HTML (or XML) source text. + @return The plain text, as a Unicode string, if necessary. + + Author: Fredrik Lundh + Source: http://effbot.org/zone/re-sub.htm#unescape-html + ''' + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return chr(int(text[3:-1], 16)) + else: + return chr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + text = chr(html.entities.name2codepoint[text[1:-1]]) + except KeyError: + pass + return text # leave as is + return re.sub("&#?\w+;", fixup, text) + +def unescapeString(string): + ''' + Unescape the given string + + @param string: An escaped string + @return: Unescaped string + ''' + toUnescapeChars = ['\\', '(', ')'] + unescapedValue = '' + i = 0 + while i < len(string): + if string[i] == '\\' and i != len(string)-1: + if string[i+1] in toUnescapeChars: + if string[i+1] == '\\': + unescapedValue += '\\' + i += 1 + else: + pass + elif string[i+1] == 'r': + i += 1 + unescapedValue += '\r' + elif string[i+1] == 'n': + i += 1 + unescapedValue += '\n' + elif string[i+1] == 't': + i += 1 + unescapedValue += '\t' + elif string[i+1] == 'b': + i += 1 + unescapedValue += '\b' + elif string[i+1] == 'f': + i += 1 + unescapedValue += '\f' + else: + unescapedValue += string[i] + else: + unescapedValue += string[i] + i += 1 + return unescapedValue + +def vtcheck(md5, vtKey): + ''' + Function to check a hash on VirusTotal and get the report summary + + @param md5: The MD5 to check (hexdigest) + @param vtKey: The VirusTotal API key needed to perform the request + @return: A dictionary with the result of the request + ''' + vtUrl = 'https://www.virustotal.com/vtapi/v2/file/report' + parameters = {'resource': md5, 'apikey': vtKey} + try: + data = urllib.parse.urlencode(parameters) + req = urllib.request.Request(vtUrl, data) + response = urllib.request.urlopen(req) + jsonResponse = response.read() + except: + return (-1, 'The request to VirusTotal has not been successful') + try: + jsonDict = json.loads(jsonResponse) + except: + return (-1, 'An error has occurred while parsing the JSON response from VirusTotal') + return (0, jsonDict) diff --git a/peepdf/__init__.py b/peepdf/__init__.py new file mode 100644 index 0000000..6c8212d --- /dev/null +++ b/peepdf/__init__.py @@ -0,0 +1,22 @@ +# peepdf is a tool to analyse and modify PDF files +# http://peepdf.eternal-todo.com +# By Jose Miguel Esparza +# +# Copyright (C) 2016 Jose Miguel Esparza +# +# This file is part of peepdf. +# +# peepdf is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# peepdf is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with peepdf. If not, see . + +from . import PDFConsole, PDFCore, PDFCrypto, PDFFilters, PDFUtils diff --git a/aes.py b/peepdf/aes.py similarity index 93% rename from aes.py rename to peepdf/aes.py index a034dcd..c51945c 100644 --- a/aes.py +++ b/peepdf/aes.py @@ -41,8 +41,8 @@ def decryptData(data, password = None, keyLength = None, mode = 'CBC'): if keyLength not in [128, 192, 256]: return (-1, 'Bad length key in AES decryption process') - iv = map(ord, data[:16]) - key = map(ord, password) + iv = list(map(ord, data[:16])) + key = list(map(ord, password)) data = data[16:] if len(data) % 16 != 0: data = data[:-(len(data)%16)] @@ -53,8 +53,8 @@ def decryptData(data, password = None, keyLength = None, mode = 'CBC'): aesMode = cbc_mode.CBCMode(aesCipher, 16) aesMode.set_iv(iv) for i in range(0,len(data),16): - ciphertext = map(ord,data[i:i+16]) + ciphertext = list(map(ord,data[i:i+16])) decryptedBytes = aesMode.decrypt_block(ciphertext) for byte in decryptedBytes: decryptedData += chr(byte) - return (0, decryptedData) \ No newline at end of file + return (0, decryptedData) diff --git a/ccitt.py b/peepdf/ccitt.py similarity index 94% rename from ccitt.py rename to peepdf/ccitt.py index 6bcb87b..a61ac23 100644 --- a/ccitt.py +++ b/peepdf/ccitt.py @@ -36,7 +36,7 @@ def write(self, data, length): """ """ if not ( length >= 0 and (1 << length) > data ): - raise BitWriterException, "Invalid data length" + raise BitWriterException("Invalid data length") if length == 8 and not self._last_byte and self._bit_ptr == 0: self._data += chr(data) @@ -108,7 +108,7 @@ def pos(self, bits): """ """ if bits > self.size: - raise BitReaderException, "Pointer position out of data" + raise BitReaderException("Pointer position out of data") pbyte = bits >> 3 pbit = bits - (pbyte <<3) @@ -118,9 +118,9 @@ def peek(self, length): """ """ if length <= 0: - raise BitReaderException, "Invalid read length" + raise BitReaderException("Invalid read length") elif ( self.pos + length ) > self.size: - raise BitReaderException, "Insufficient data" + raise BitReaderException("Insufficient data") n = 0 byte_ptr, bit_ptr = self._byte_ptr, self._bit_ptr @@ -228,7 +228,7 @@ class CCITTFax(object): 63 : codeword('00110100') } - WHITE_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in WHITE_TERMINAL_ENCODE_TABLE.iteritems() ) + WHITE_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in WHITE_TERMINAL_ENCODE_TABLE.items() ) BLACK_TERMINAL_ENCODE_TABLE = { 0 : codeword('0000110111'), @@ -297,7 +297,7 @@ class CCITTFax(object): 63 : codeword('000001100111') } - BLACK_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in BLACK_TERMINAL_ENCODE_TABLE.iteritems() ) + BLACK_TERMINAL_DECODE_TABLE = dict( (v, k) for k, v in BLACK_TERMINAL_ENCODE_TABLE.items() ) WHITE_CONFIGURATION_ENCODE_TABLE = { 64 : codeword('11011'), @@ -343,7 +343,7 @@ class CCITTFax(object): 2560 : codeword('000000011111') } - WHITE_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in WHITE_CONFIGURATION_ENCODE_TABLE.iteritems() ) + WHITE_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in WHITE_CONFIGURATION_ENCODE_TABLE.items() ) BLACK_CONFIGURATION_ENCODE_TABLE = { 64 : codeword('0000001111'), @@ -389,7 +389,7 @@ class CCITTFax(object): 2560 : codeword('000000011111') } - BLACK_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in BLACK_CONFIGURATION_ENCODE_TABLE.iteritems() ) + BLACK_CONFIGURATION_DECODE_TABLE = dict( (v, k) for k, v in BLACK_CONFIGURATION_ENCODE_TABLE.items() ) def __init__(self, ): """ @@ -422,7 +422,7 @@ def decode(self, stream, k = 0, eol = False, byteAlign = False, columns = 1728, if bitr.peek(self.EOL[1]) != self.EOL[0]: if eol: - raise Exception, "No end-of-line pattern found (at bit pos %d/%d)" % (bitr.pos, bitr.size) + raise Exception("No end-of-line pattern found (at bit pos %d/%d)" % (bitr.pos, bitr.size)) else: bitr.pos += self.EOL[1] @@ -433,11 +433,11 @@ def decode(self, stream, k = 0, eol = False, byteAlign = False, columns = 1728, else: bit_length = self.get_black_bits(bitr) if bit_length == None: - raise Exception, "Unfinished line (at bit pos %d/%d), %s" % (bitr.pos, bitr.size, bitw.data) + raise Exception("Unfinished line (at bit pos %d/%d), %s" % (bitr.pos, bitr.size, bitw.data)) line_length += bit_length if line_length > columns: - raise Exception, "Line is too long (at bit pos %d/%d)" % (bitr.pos, bitr.size) + raise Exception("Line is too long (at bit pos %d/%d)" % (bitr.pos, bitr.size)) bitw.write( (current_color << bit_length) - current_color, bit_length ) @@ -465,7 +465,7 @@ def get_color_bits(self, bitr, config_words, term_words): while check_conf: check_conf = False - for i in xrange(2, 14): + for i in range(2, 14): codeword = bitr.peek(i) config_value = config_words.get((codeword, i), None) @@ -476,7 +476,7 @@ def get_color_bits(self, bitr, config_words, term_words): check_conf = True break - for i in xrange(2, 14): + for i in range(2, 14): codeword = bitr.peek(i) term_value = term_words.get((codeword, i), None) diff --git a/jjdecode.py b/peepdf/jjdecode.py old mode 100755 new mode 100644 similarity index 100% rename from jjdecode.py rename to peepdf/jjdecode.py diff --git a/lzw.py b/peepdf/lzw.py similarity index 96% rename from lzw.py rename to peepdf/lzw.py index c825cbf..ac9ed99 100644 --- a/lzw.py +++ b/peepdf/lzw.py @@ -66,7 +66,7 @@ code points are stored with their MSB in the most significant bit available in the output character. ->>> import lzw +>>> import peepdf.lzw >>> >>> mybytes = lzw.readbytes("README.txt") >>> lessbytes = lzw.compress(mybytes) @@ -128,7 +128,7 @@ class ByteEncoder(object): with a L{BitPacker}. - >>> import lzw + >>> import peepdf.lzw >>> >>> enc = lzw.ByteEncoder(12) >>> bigstr = b"gabba gabba yo gabba gabba gabba yo gabba gabba gabba yo gabba gabba gabba yo" @@ -188,10 +188,10 @@ def decodefrombytes(self, bytesource): iterator over the uncompressed bytes. Dual of L{ByteEncoder.encodetobytes}. See L{ByteEncoder} for an example of use. - """ + """ codepoints = self._unpacker.unpack(bytesource) clearbytes = self._decoder.decode(codepoints) - + return clearbytes @@ -236,7 +236,7 @@ def pack(self, codepoints): and bytes following END_OF_INFO_CODE will be aligned to the next byte boundary. - >>> import lzw + >>> import peepdf.lzw >>> pkr = lzw.BitPacker(258) >>> [ b for b in pkr.pack([ 1, 257]) ] == [ chr(0), chr(0xC0), chr(0x40) ] True @@ -262,7 +262,7 @@ def pack(self, codepoints): if pt == END_OF_INFO_CODE: while len(tailbits) % 8: tailbits.append(0) - + if pt in [ CLEAR_CODE, END_OF_INFO_CODE ]: nextwidth = minwidth codesize = self._initial_code_size @@ -277,13 +277,13 @@ def pack(self, codepoints): tailbits = tailbits[8:] - + if tailbits: tail = bitstobytes(tailbits) for bt in tail: yield struct.pack("B", bt) - + class BitUnpacker(object): @@ -317,7 +317,7 @@ def unpack(self, bytesource): stop the generator, just reset the alignment and the width - >>> import lzw + >>> import peepdf.lzw >>> unpk = lzw.BitUnpacker(initial_code_size=258) >>> [ i for i in unpk.unpack([ chr(0), chr(0xC0), chr(0x40) ]) ] [1, 257] @@ -325,7 +325,7 @@ def unpack(self, bytesource): bits = [] offset = 0 ignore = 0 - + codesize = self._initial_code_size minwidth = 8 while (1 << minwidth) < codesize: @@ -403,7 +403,7 @@ def decode(self, codepoints): be handled by the upstream codepoint generator (see L{BitUnpacker}, for example) - >>> import lzw + >>> import peepdf.lzw >>> dec = lzw.Decoder() >>> ''.join(dec.decode([103, 97, 98, 98, 97, 32, 258, 260, 262, 121, 111, 263, 259, 261, 256])) 'gabba gabba yo gabba' @@ -424,7 +424,7 @@ def _decode_codepoint(self, codepoint): code. EOI codes should be handled by callers if they're present in our source stream. - >>> import lzw + >>> import peepdf.lzw >>> dec = lzw.Decoder() >>> beforesize = dec.code_size() >>> dec._decode_codepoint(0x80) @@ -484,7 +484,7 @@ def __init__(self, max_code_size=(2**DEFAULT_MAX_BITS)): self._max_code_size = max_code_size self._buffer = '' - self._clear_codes() + self._clear_codes() if max_code_size < self.code_size(): raise ValueError("Max code size too small, (must be at least {0})".format(self.code_size())) @@ -509,12 +509,12 @@ def flush(self): if self._buffer: yield self._prefixes[ self._buffer ] - self._buffer = '' + self._buffer = '' yield CLEAR_CODE self._clear_codes() - + def encode(self, bytesource): @@ -527,7 +527,7 @@ def encode(self, bytesource): >>> enc = lzw.Encoder() >>> [ cp for cp in enc.encode("gabba gabba yo gabba") ] [103, 97, 98, 98, 97, 32, 258, 260, 262, 121, 111, 263, 259, 261, 256] - + Modified by Jose Miguel Esparza to add support for PDF files encoding """ yield CLEAR_CODE @@ -551,7 +551,7 @@ def _encode_byte(self, byte): # want to call this. new_prefix = self._buffer - + if new_prefix + byte in self._prefixes: new_prefix = new_prefix + byte elif new_prefix: @@ -560,7 +560,7 @@ def _encode_byte(self, byte): new_prefix = byte yield encoded - + self._buffer = new_prefix @@ -602,9 +602,9 @@ def encodepages(self, pages): The dual of PagingDecoder.decodepages - >>> import lzw + >>> import peepdf.lzw >>> enc = lzw.PagingEncoder(257, 2**12) - >>> coded = enc.encodepages([ "say hammer yo hammer mc hammer go hammer", + >>> coded = enc.encodepages([ "say hammer yo hammer mc hammer go hammer", ... "and the rest can go and play", ... "can't touch this" ]) ... @@ -622,11 +622,11 @@ def encodepages(self, pages): packer = BitPacker(initial_code_size=encoder.code_size()) packed = packer.pack(codes_and_eoi) - for byte in packed: + for byte in packed: yield byte - + class PagingDecoder(object): """ @@ -646,7 +646,7 @@ def next_page(self, codepoints): try: while 1: - cp = codepoints.next() + cp = next(codepoints) if cp != END_OF_INFO_CODE: yield cp else: @@ -655,7 +655,7 @@ def next_page(self, codepoints): except StopIteration: pass - + def decodepages(self, bytesource): """ @@ -667,7 +667,7 @@ def decodepages(self, bytesource): BUG: Dangling trailing page on decompression. - >>> import lzw + >>> import peepdf.lzw >>> pgdec = lzw.PagingDecoder(initial_code_size=257) >>> pgdecoded = pgdec.decodepages( ... ''.join([ '\\x80\\x1c\\xcc\\'\\x91\\x01\\xa0\\xc2m6', @@ -734,7 +734,7 @@ def filebytes(fileobj, buffersize=1024): for byte in buff: yield byte buff = fileobj.read(buffersize) - + def readbytes(filename, buffersize=1024): """ Opens a file named by filename and iterates over the L{filebytes} @@ -765,7 +765,7 @@ def inttobits(anint, width=None): MSBs to the given width (but will NOT truncate overflowing results) - >>> import lzw + >>> import peepdf.lzw >>> lzw.inttobits(304, width=16) [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0] @@ -791,15 +791,15 @@ def intfrombits(bits): Given a list of boolean values, interprets them as a binary encoded, MSB-first unsigned integer (with True == 1 and False == 0) and returns the result. - - >>> import lzw + + >>> import peepdf.lzw >>> lzw.intfrombits([ 1, 0, 0, 1, 1, 0, 0, 0, 0 ]) 304 """ ret = 0 lsb_first = [ b for b in bits ] lsb_first.reverse() - + for bit_index in range(len(lsb_first)): if lsb_first[ bit_index ]: ret = ret | (1 << bit_index) @@ -811,8 +811,8 @@ def bytestobits(bytesource): """ Breaks a given iterable of bytes into an iterable of boolean values representing those bytes as unsigned integers. - - >>> import lzw + + >>> import peepdf.lzw >>> [ x for x in lzw.bytestobits(b"\\x01\\x30") ] [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0] """ @@ -836,7 +836,7 @@ def bitstobytes(bits): Does *NOT* pack the returned values into a bytearray or the like. - >>> import lzw + >>> import peepdf.lzw >>> bitstobytes([0, 0, 0, 0, 0, 0, 0, 0, "Yes, I'm True"]) == [ 0x00, 0x80 ] True >>> bitstobytes([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0]) == [ 0x01, 0x30 ] @@ -858,7 +858,7 @@ def bitstobytes(bits): if nextbit < 7: ret.append(nextbyte) return ret - + @@ -871,15 +871,12 @@ def bitstobytes(bits): The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ''' import sys -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +from io import StringIO ## LZWDecoder ## @@ -923,7 +920,7 @@ def readbits(self, bits): def feed(self, code): x = '' if code == 256: - self.table = [ chr(c) for c in xrange(256) ] # 0-255 + self.table = [ chr(c) for c in range(256) ] # 0-255 self.table.append(None) # 256 self.table.append(None) # 257 self.prevbuf = '' @@ -958,8 +955,8 @@ def run(self): x = self.feed(code) yield x if self.debug: - print >>sys.stderr, ('nbits=%d, code=%d, output=%r, table=%r' % - (self.nbits, code, x, self.table[258:])) + x = 'nbits=%d, code=%d, output=%r, table=%r' % self.nbits, code, x, self.table[258:] + sys.stderr.write(x) return @@ -969,4 +966,4 @@ def lzwdecode(data): '\x2d\x2d\x2d\x2d\x2d\x41\x2d\x2d\x2d\x42' """ fp = StringIO(data) - return ''.join(LZWDecoder(fp).run()) \ No newline at end of file + return ''.join(LZWDecoder(fp).run()) diff --git a/peepdf/main.py b/peepdf/main.py new file mode 100644 index 0000000..66ea734 --- /dev/null +++ b/peepdf/main.py @@ -0,0 +1,679 @@ +#!/usr/bin/env python + +# +# peepdf is a tool to analyse and modify PDF files +# http://peepdf.eternal-todo.com +# By Jose Miguel Esparza +# +# Copyright (C) 2011-2017 Jose Miguel Esparza +# +# This file is part of peepdf. +# +# peepdf is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# peepdf is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with peepdf. If not, see . +# + +''' + Initial script to launch the tool +''' + +import sys +import os +import optparse +import traceback +import json +from datetime import datetime +from peepdf.PDFCore import PDFParser, vulnsDict +from peepdf.PDFUtils import vtcheck + + +VT_KEY = 'fc90df3f5ac749a94a94cb8bf87e05a681a2eb001aef34b6a0084b8c22c97a64' + +try: + import STPyV8 as PyV8 + JS_MODULE = True + + PyV8 +except: + JS_MODULE = False +try: + import pylibemu + EMU_MODULE = True + + pylibemu +except: + EMU_MODULE = False +try: + from colorama import init, Fore, Style + COLORIZED_OUTPUT = True +except: + COLORIZED_OUTPUT = False + +try: + from PIL import Image + PIL_MODULE = True + + Image +except: + PIL_MODULE = False + + +try: + from lxml import etree +except: + etree = None + + +def getPeepXML(statsDict, version, revision): + assert etree is not None, "lxml must be installed for --xml" + + root = etree.Element('peepdf_analysis', version=version + ' r' + revision, url='http://peepdf.eternal-todo.com', + author='Jose Miguel Esparza') + analysisDate = etree.SubElement(root, 'date') + analysisDate.text = datetime.today().strftime('%Y-%m-%d %H:%M') + basicInfo = etree.SubElement(root, 'basic') + fileName = etree.SubElement(basicInfo, 'filename') + fileName.text = statsDict['File'] + md5 = etree.SubElement(basicInfo, 'md5') + md5.text = statsDict['MD5'] + sha1 = etree.SubElement(basicInfo, 'sha1') + sha1.text = statsDict['SHA1'] + sha256 = etree.SubElement(basicInfo, 'sha256') + sha256.text = statsDict['SHA256'] + size = etree.SubElement(basicInfo, 'size') + size.text = statsDict['Size'] + detection = etree.SubElement(basicInfo, 'detection') + if statsDict['Detection']: + detectionRate = etree.SubElement(detection, 'rate') + detectionRate.text = '%d/%d' % (statsDict['Detection'][0], statsDict['Detection'][1]) + detectionReport = etree.SubElement(detection, 'report_link') + detectionReport.text = statsDict['Detection report'] + version = etree.SubElement(basicInfo, 'pdf_version') + version.text = statsDict['Version'] + encrypted = etree.SubElement(basicInfo, 'encrypted', status=statsDict['Encrypted'].lower()) + if statsDict['Encryption Algorithms']: + algorithms = etree.SubElement(encrypted, 'algorithms') + for algorithmInfo in statsDict['Encryption Algorithms']: + algorithm = etree.SubElement(algorithms, 'algorithm', bits=str(algorithmInfo[1])) + algorithm.text = algorithmInfo[0] + updates = etree.SubElement(basicInfo, 'updates') + updates.text = statsDict['Updates'] + objects = etree.SubElement(basicInfo, 'num_objects') + objects.text = statsDict['Objects'] + streams = etree.SubElement(basicInfo, 'num_streams') + streams.text = statsDict['Streams'] + comments = etree.SubElement(basicInfo, 'comments') + comments.text = statsDict['Comments'] + errors = etree.SubElement(basicInfo, 'errors', num=str(len(statsDict['Errors']))) + for error in statsDict['Errors']: + errorMessageXML = etree.SubElement(errors, 'error_message') + errorMessageXML.text = error + advancedInfo = etree.SubElement(root, 'advanced') + for version in range(len(statsDict['Versions'])): + statsVersion = statsDict['Versions'][version] + if version == 0: + versionType = 'original' + else: + versionType = 'update' + versionInfo = etree.SubElement(advancedInfo, 'version', num=str(version), type=versionType) + catalog = etree.SubElement(versionInfo, 'catalog') + if statsVersion['Catalog'] is not None: + catalog.set('object_id', statsVersion['Catalog']) + info = etree.SubElement(versionInfo, 'info') + if statsVersion['Info'] is not None: + info.set('object_id', statsVersion['Info']) + objects = etree.SubElement(versionInfo, 'objects', num=statsVersion['Objects'][0]) + for id in statsVersion['Objects'][1]: + object = etree.SubElement(objects, 'object', id=str(id)) + if statsVersion['Compressed Objects'] is not None: + if id in statsVersion['Compressed Objects'][1]: + object.set('compressed', 'true') + else: + object.set('compressed', 'false') + if statsVersion['Errors'] is not None: + if id in statsVersion['Errors'][1]: + object.set('errors', 'true') + else: + object.set('errors', 'false') + streams = etree.SubElement(versionInfo, 'streams', num=statsVersion['Streams'][0]) + for id in statsVersion['Streams'][1]: + stream = etree.SubElement(streams, 'stream', id=str(id)) + if statsVersion['Xref Streams'] is not None: + if id in statsVersion['Xref Streams'][1]: + stream.set('xref_stream', 'true') + else: + stream.set('xref_stream', 'false') + if statsVersion['Object Streams'] is not None: + if id in statsVersion['Object Streams'][1]: + stream.set('object_stream', 'true') + else: + stream.set('object_stream', 'false') + if statsVersion['Encoded'] is not None: + if id in statsVersion['Encoded'][1]: + stream.set('encoded', 'true') + if statsVersion['Decoding Errors'] is not None: + if id in statsVersion['Decoding Errors'][1]: + stream.set('decoding_errors', 'true') + else: + stream.set('decoding_errors', 'false') + else: + stream.set('encoded', 'false') + jsObjects = etree.SubElement(versionInfo, 'js_objects') + if statsVersion['Objects with JS code'] is not None: + for id in statsVersion['Objects with JS code'][1]: + etree.SubElement(jsObjects, 'container_object', id=str(id)) + actions = statsVersion['Actions'] + events = statsVersion['Events'] + vulns = statsVersion['Vulns'] + elements = statsVersion['Elements'] + suspicious = etree.SubElement(versionInfo, 'suspicious_elements') + if events is not None or actions is not None or vulns is not None or elements is not None: + if events: + triggers = etree.SubElement(suspicious, 'triggers') + for event in events: + trigger = etree.SubElement(triggers, 'trigger', name=event) + for id in events[event]: + etree.SubElement(trigger, 'container_object', id=str(id)) + if actions: + actionsList = etree.SubElement(suspicious, 'actions') + for action in actions: + actionInfo = etree.SubElement(actionsList, 'action', name=action) + for id in actions[action]: + etree.SubElement(actionInfo, 'container_object', id=str(id)) + if elements: + elementsList = etree.SubElement(suspicious, 'elements') + for element in elements: + elementInfo = etree.SubElement(elementsList, 'element', name=element) + if element in vulnsDict: + vulnCVEList = vulnsDict[element][1] + for vulnCVE in vulnCVEList: + cve = etree.SubElement(elementInfo, 'cve') + cve.text = vulnCVE + for id in elements[element]: + etree.SubElement(elementInfo, 'container_object', id=str(id)) + if vulns: + vulnsList = etree.SubElement(suspicious, 'js_vulns') + for vuln in vulns: + vulnInfo = etree.SubElement(vulnsList, 'vulnerable_function', name=vuln) + if vuln in vulnsDict: + vulnCVEList = vulnsDict[vuln][1] + for vulnCVE in vulnCVEList: + cve = etree.SubElement(vulnInfo, 'cve') + cve.text = vulnCVE + for id in vulns[vuln]: + etree.SubElement(vulnInfo, 'container_object', id=str(id)) + urls = statsVersion['URLs'] + suspiciousURLs = etree.SubElement(versionInfo, 'suspicious_urls') + if urls is not None: + for url in urls: + urlInfo = etree.SubElement(suspiciousURLs, 'url') + urlInfo.text = url + return etree.tostring(root, pretty_print=True) + + +def getPeepJSON(statsDict, version, revision): + # peepdf info + peepdfDict = {'version': version, + 'revision': revision, + 'author': 'Jose Miguel Esparza', + 'url': 'http://peepdf.eternal-todo.com'} + # Basic info + basicDict = {} + basicDict['filename'] = statsDict['File'] + basicDict['md5'] = statsDict['MD5'] + basicDict['sha1'] = statsDict['SHA1'] + basicDict['sha256'] = statsDict['SHA256'] + basicDict['size'] = int(statsDict['Size']) + basicDict['detection'] = {} + if statsDict['Detection'] != [] and statsDict['Detection'] is not None: + basicDict['detection']['rate'] = '%d/%d' % (statsDict['Detection'][0], statsDict['Detection'][1]) + basicDict['detection']['report_link'] = statsDict['Detection report'] + basicDict['pdf_version'] = statsDict['Version'] + basicDict['binary'] = bool(statsDict['Binary']) + basicDict['linearized'] = bool(statsDict['Linearized']) + basicDict['encrypted'] = bool(statsDict['Encrypted']) + basicDict['encryption_algorithms'] = [] + if statsDict['Encryption Algorithms']: + for algorithmInfo in statsDict['Encryption Algorithms']: + basicDict['encryption_algorithms'].append({'bits': algorithmInfo[1], 'algorithm': algorithmInfo[0]}) + basicDict['updates'] = int(statsDict['Updates']) + basicDict['num_objects'] = int(statsDict['Objects']) + basicDict['num_streams'] = int(statsDict['Streams']) + basicDict['comments'] = int(statsDict['Comments']) + basicDict['errors'] = [] + for error in statsDict['Errors']: + basicDict['errors'].append(error) + # Advanced info + advancedInfo = [] + for version in range(len(statsDict['Versions'])): + statsVersion = statsDict['Versions'][version] + if version == 0: + versionType = 'original' + else: + versionType = 'update' + versionInfo = {} + versionInfo['version_number'] = version + versionInfo['version_type'] = versionType + versionInfo['catalog'] = statsVersion['Catalog'] + versionInfo['info'] = statsVersion['Info'] + if statsVersion['Objects'] is not None: + versionInfo['objects'] = statsVersion['Objects'][1] + else: + versionInfo['objects'] = [] + if statsVersion['Compressed Objects'] is not None: + versionInfo['compressed_objects'] = statsVersion['Compressed Objects'][1] + else: + versionInfo['compressed_objects'] = [] + if statsVersion['Errors'] is not None: + versionInfo['error_objects'] = statsVersion['Errors'][1] + else: + versionInfo['error_objects'] = [] + if statsVersion['Streams'] is not None: + versionInfo['streams'] = statsVersion['Streams'][1] + else: + versionInfo['streams'] = [] + if statsVersion['Xref Streams'] is not None: + versionInfo['xref_streams'] = statsVersion['Xref Streams'][1] + else: + versionInfo['xref_streams'] = [] + if statsVersion['Encoded'] is not None: + versionInfo['encoded_streams'] = statsVersion['Encoded'][1] + else: + versionInfo['encoded_streams'] = [] + if versionInfo['encoded_streams'] and statsVersion['Decoding Errors'] is not None: + versionInfo['decoding_error_streams'] = statsVersion['Decoding Errors'][1] + else: + versionInfo['decoding_error_streams'] = [] + if statsVersion['Objects with JS code'] is not None: + versionInfo['js_objects'] = statsVersion['Objects with JS code'][1] + else: + versionInfo['js_objects'] = [] + elements = statsVersion['Elements'] + elementArray = [] + if elements: + for element in elements: + elementInfo = {'name': element} + if element in vulnsDict: + elementInfo['vuln_name'] = vulnsDict[element][0] + elementInfo['vuln_cve_list'] = vulnsDict[element][1] + elementInfo['objects'] = elements[element] + elementArray.append(elementInfo) + vulns = statsVersion['Vulns'] + vulnArray = [] + if vulns: + for vuln in vulns: + vulnInfo = {'name': vuln} + if vuln in vulnsDict: + vulnInfo['vuln_name'] = vulnsDict[vuln][0] + vulnInfo['vuln_cve_list'] = vulnsDict[vuln][1] + vulnInfo['objects'] = vulns[vuln] + vulnArray.append(vulnInfo) + versionInfo['suspicious_elements'] = {'triggers': statsVersion['Events'], + 'actions': statsVersion['Actions'], + 'elements': elementArray, + 'js_vulns': vulnArray, + 'urls': statsVersion['URLs']} + versionReport = {'version_info': versionInfo} + advancedInfo.append(versionReport) + jsonDict = { + 'peepdf_analysis': { + 'peepdf_info': peepdfDict, + 'date': datetime.today().strftime('%Y-%m-%d %H:%M'), + 'basic': basicDict, + 'advanced': advancedInfo, + } + } + return json.dumps(jsonDict, indent=4, sort_keys=True) + + +author = 'Jose Miguel Esparza' +email = 'peepdf AT eternal-todo.com' +url = 'http://peepdf.eternal-todo.com' +twitter = 'http://twitter.com/EternalTodo' +peepTwitter = 'http://twitter.com/peepdf' +_version = '0.42' +revision = '420' +newLine = os.linesep +errorsFile = os.path.expanduser("~/.peepdf-error.txt") + +versionHeader = 'Will\'s peepdf ' + _version + ' r' + revision +peepdfHeader = ( + versionHeader + newLine * 2 + url + newLine + peepTwitter + newLine + + email + newLine * 2 + author + newLine + twitter + newLine +) + +def main(): + global COLORIZED_OUTPUT + + argsParser = optparse.OptionParser(usage='Usage: peepdf.py [options] PDF_file', description=versionHeader) + argsParser.add_option('-i', '--interactive', action='store_true', dest='isInteractive', default=False, help='Sets console mode.') + argsParser.add_option('-s', '--load-script', action='store', type='string', dest='scriptFile', help='Loads the commands stored in the specified file and execute them.') + argsParser.add_option('-c', '--check-vt', action='store_true', dest='checkOnVT', default=False, help='Checks the hash of the PDF file on VirusTotal.') + argsParser.add_option('-f', '--force-mode', action='store_true', dest='isForceMode', default=False, help='Sets force parsing mode to ignore errors.') + argsParser.add_option('-l', '--loose-mode', action='store_true', dest='isLooseMode', default=False, help='Sets loose parsing mode to catch malformed objects.') + argsParser.add_option('-m', '--manual-analysis', action='store_true', dest='isManualAnalysis', default=False, help='Avoids automatic Javascript analysis. Useful with eternal loops like heap spraying.') + argsParser.add_option('-g', '--grinch-mode', action='store_true', dest='avoidColors', default=False, help='Avoids colorized output in the interactive console.') + argsParser.add_option('-v', '--version', action='store_true', dest='version', default=False, help='Shows program\'s version number.') + argsParser.add_option('-x', '--xml', action='store_true', dest='xmlOutput', default=False, help='Shows the document information in XML format.') + argsParser.add_option('-j', '--json', action='store_true', dest='jsonOutput', default=False, help='Shows the document information in JSON format.') + argsParser.add_option('-C', '--command', action='append', type='string', dest='commands', help='Specifies a command from the interactive console to be executed.') + (options, args) = argsParser.parse_args() + + stats = "" + pdf = None + fileName = None + statsDict = None + vtJsonDict = None + + try: + # Avoid colors in the output + if not COLORIZED_OUTPUT or options.avoidColors: + warningColor = '' + errorColor = '' + alertColor = '' + staticColor = '' + resetColor = '' + else: + warningColor = Fore.YELLOW + errorColor = Fore.RED + alertColor = Fore.RED + staticColor = Fore.BLUE + resetColor = Style.RESET_ALL + + if options.version: + print(peepdfHeader) + else: + if len(args) == 1: + fileName = args[0] + if not os.path.exists(fileName): + sys.exit('Error: The file "' + fileName + '" does not exist!!') + elif len(args) > 1 or (len(args) == 0 and not options.isInteractive): + sys.exit(argsParser.print_help()) + + if options.scriptFile is not None: + if not os.path.exists(options.scriptFile): + sys.exit('Error: The script file "' + options.scriptFile + '" does not exist!!') + + if fileName is not None: + pdfParser = PDFParser() + ret, pdf = pdfParser.parse(fileName, options.isForceMode, options.isLooseMode, options.isManualAnalysis) + if options.checkOnVT: + # Checks the MD5 on VirusTotal + md5Hash = pdf.getMD5() + ret = vtcheck(md5Hash, VT_KEY) + if ret[0] == -1: + pdf.addError(ret[1]) + else: + vtJsonDict = ret[1] + if "response_code" in vtJsonDict: + if vtJsonDict['response_code'] == 1: + if "positives" in vtJsonDict and "total" in vtJsonDict: + pdf.setDetectionRate([vtJsonDict['positives'], vtJsonDict['total']]) + else: + pdf.addError('Missing elements in the response from VirusTotal!!') + if "permalink" in vtJsonDict: + pdf.setDetectionReport(vtJsonDict['permalink']) + else: + pdf.setDetectionRate(None) + else: + pdf.addError('Bad response from VirusTotal!!') + statsDict = pdf.getStats() + + if options.xmlOutput: + try: + xml = getPeepXML(statsDict, _version, revision) + sys.stdout.write("".join( chr(x) for x in bytearray(xml))) + except: + errorMessage = '*** Error: Exception while generating the XML file!!' + traceback.print_exc(file=open(errorsFile, 'a')) + raise Exception('PeepException', 'Send me an email ;)') + elif options.jsonOutput and not options.commands: + try: + jsonReport = getPeepJSON(statsDict, _version, revision) + sys.stdout.write(jsonReport) + except: + errorMessage = '*** Error: Exception while generating the JSON report!!' + traceback.print_exc(file=open(errorsFile, 'a')) + raise Exception('PeepException', 'Send me an email ;)') + else: + if COLORIZED_OUTPUT and not options.avoidColors: + try: + init() + except: + COLORIZED_OUTPUT = False + if options.scriptFile is not None: + from peepdf.PDFConsole import PDFConsole + + scriptFileObject = open(options.scriptFile, 'rb') + console = PDFConsole(pdf, VT_KEY, options.avoidColors, stdin=scriptFileObject) + try: + console.cmdloop() + except: + errorMessage = '*** Error: Exception not handled using the batch mode!!' + scriptFileObject.close() + traceback.print_exc(file=open(errorsFile, 'a')) + raise Exception('PeepException', 'Send me an email ;)') + elif options.commands is not None: + from .PDFConsole import PDFConsole + + console = PDFConsole(pdf, VT_KEY, options.avoidColors) + try: + for command in options.commands: + console.onecmd(command) + except: + errorMessage = '*** Error: Exception not handled using the batch commands!!' + traceback.print_exc(file=open(errorsFile, 'a')) + raise Exception('PeepException', 'Send me an email ;)') + else: + if statsDict is not None: + if COLORIZED_OUTPUT and not options.avoidColors: + beforeStaticLabel = staticColor + else: + beforeStaticLabel = '' + + if not JS_MODULE: + warningMessage = 'Warning: PyV8 is not installed!!' + stats += warningColor + warningMessage + resetColor + newLine + if not EMU_MODULE: + warningMessage = 'Warning: pylibemu is not installed!!' + stats += warningColor + warningMessage + resetColor + newLine + if not PIL_MODULE: + warningMessage = 'Warning: Python Imaging Library (PIL) is not installed!!' + stats += warningColor + warningMessage + resetColor + newLine + errors = statsDict['Errors'] + for error in errors: + if error.find('Decryption error') != -1: + stats += errorColor + error + resetColor + newLine + if stats != '': + stats += newLine + statsDict = pdf.getStats() + + stats += beforeStaticLabel + 'File: ' + resetColor + statsDict['File'] + newLine + stats += beforeStaticLabel + 'MD5: ' + resetColor + statsDict['MD5'] + newLine + stats += beforeStaticLabel + 'SHA1: ' + resetColor + statsDict['SHA1'] + newLine + stats += beforeStaticLabel + 'SHA256: ' + resetColor + statsDict['SHA256'] + newLine + stats += beforeStaticLabel + 'Size: ' + resetColor + statsDict['Size'] + ' bytes' + newLine + if options.checkOnVT: + if statsDict['Detection'] != []: + detectionReportInfo = '' + if statsDict['Detection'] is not None: + detectionColor = '' + if COLORIZED_OUTPUT and not options.avoidColors: + detectionLevel = statsDict['Detection'][0] / (statsDict['Detection'][1] / 3) + if detectionLevel == 0: + detectionColor = alertColor + elif detectionLevel == 1: + detectionColor = warningColor + detectionRate = '%s%d%s/%d' % ( + detectionColor, statsDict['Detection'][0], resetColor, statsDict['Detection'][1]) + if statsDict['Detection report'] != '': + detectionReportInfo = ( + beforeStaticLabel + 'Detection report: ' + resetColor + + statsDict['Detection report'] + newLine + ) + else: + detectionRate = 'File not found on VirusTotal' + stats += beforeStaticLabel + 'Detection: ' + resetColor + detectionRate + newLine + stats += detectionReportInfo + stats += beforeStaticLabel + 'Version: ' + resetColor + statsDict['Version'] + newLine + stats += beforeStaticLabel + 'Binary: ' + resetColor + statsDict['Binary'] + newLine + stats += beforeStaticLabel + 'Linearized: ' + resetColor + statsDict['Linearized'] + newLine + stats += beforeStaticLabel + 'Encrypted: ' + resetColor + statsDict['Encrypted'] + if statsDict['Encryption Algorithms'] != []: + stats += ' (' + for algorithmInfo in statsDict['Encryption Algorithms']: + stats += algorithmInfo[0] + ' ' + str(algorithmInfo[1]) + ' bits, ' + stats = stats[:-2] + ')' + stats += newLine + stats += beforeStaticLabel + 'Updates: ' + resetColor + statsDict['Updates'] + newLine + stats += beforeStaticLabel + 'Objects: ' + resetColor + statsDict['Objects'] + newLine + stats += beforeStaticLabel + 'Streams: ' + resetColor + statsDict['Streams'] + newLine + stats += beforeStaticLabel + 'URIs: ' + resetColor + statsDict['URIs'] + newLine + stats += beforeStaticLabel + 'Comments: ' + resetColor + statsDict['Comments'] + newLine + stats += beforeStaticLabel + 'Errors: ' + resetColor + str(len(statsDict['Errors'])) + newLine * 2 + for version in range(len(statsDict['Versions'])): + statsVersion = statsDict['Versions'][version] + stats += beforeStaticLabel + 'Version ' + resetColor + str(version) + ':' + newLine + if statsVersion['Catalog'] is not None: + stats += beforeStaticLabel + '\tCatalog: ' + resetColor + statsVersion['Catalog'] + newLine + else: + stats += beforeStaticLabel + '\tCatalog: ' + resetColor + 'No' + newLine + if statsVersion['Info'] is not None: + stats += beforeStaticLabel + '\tInfo: ' + resetColor + statsVersion['Info'] + newLine + else: + stats += beforeStaticLabel + '\tInfo: ' + resetColor + 'No' + newLine + stats += beforeStaticLabel + '\tObjects (' + statsVersion['Objects'][ + 0] + '): ' + resetColor + str(statsVersion['Objects'][1]) + newLine + if statsVersion['Compressed Objects'] is not None: + stats += beforeStaticLabel + '\tCompressed objects (' + statsVersion['Compressed Objects'][ + 0] + '): ' + resetColor + str(statsVersion['Compressed Objects'][1]) + newLine + if statsVersion['Errors'] is not None: + stats += beforeStaticLabel + '\t\tErrors (' + statsVersion['Errors'][ + 0] + '): ' + resetColor + str(statsVersion['Errors'][1]) + newLine + stats += beforeStaticLabel + '\tStreams (' + statsVersion['Streams'][ + 0] + '): ' + resetColor + str(statsVersion['Streams'][1]) + if statsVersion['Xref Streams'] is not None: + stats += newLine + beforeStaticLabel + '\t\tXref streams (' + statsVersion['Xref Streams'][ + 0] + '): ' + resetColor + str(statsVersion['Xref Streams'][1]) + if statsVersion['Object Streams'] is not None: + stats += ( + newLine + beforeStaticLabel + '\t\tObject streams (' + + statsVersion['Object Streams'][0] + '): ' + resetColor + + str(statsVersion['Object Streams'][1]) + ) + if int(statsVersion['Streams'][0]) > 0: + stats += ( + newLine + beforeStaticLabel + '\t\tEncoded (' + statsVersion['Encoded'][0] + + '): ' + resetColor + str(statsVersion['Encoded'][1]) + ) + if statsVersion['Decoding Errors'] is not None: + stats += ( + newLine + beforeStaticLabel + '\t\tDecoding errors (' + + statsVersion['Decoding Errors'][0] + '): ' + resetColor + + str(statsVersion['Decoding Errors'][1]) + ) + if statsVersion['URIs'] is not None: + stats += ( + newLine + beforeStaticLabel + '\tObjects with URIs (' + + statsVersion['URIs'][0] + '): ' + resetColor + + str(statsVersion['URIs'][1]) + ) + if COLORIZED_OUTPUT and not options.avoidColors: + beforeStaticLabel = warningColor + if statsVersion['Objects with JS code'] is not None: + stats += ( + newLine + beforeStaticLabel + '\tObjects with JS code (' + + statsVersion['Objects with JS code'][0] + '): ' + resetColor + + str(statsVersion['Objects with JS code'][1]) + ) + actions = statsVersion['Actions'] + events = statsVersion['Events'] + vulns = statsVersion['Vulns'] + elements = statsVersion['Elements'] + if events is not None or actions is not None or vulns is not None or elements is not None: + stats += newLine + beforeStaticLabel + '\tSuspicious elements:' + resetColor + newLine + if events is not None: + for event in events: + stats += ( + '\t\t' + beforeStaticLabel + event + ' (%d): ' % len(events[event]) + + resetColor + str(events[event]) + newLine + ) + if actions is not None: + for action in actions: + stats += ( + '\t\t' + beforeStaticLabel + action + ' (%d): ' % len(actions[action]) + + resetColor + str(actions[action]) + newLine + ) + if vulns is not None: + for vuln in vulns: + if vuln in vulnsDict: + vulnName = vulnsDict[vuln][0] + vulnCVEList = vulnsDict[vuln][1] + stats += '\t\t' + beforeStaticLabel + vulnName + ' (' + for vulnCVE in vulnCVEList: + stats += vulnCVE + ',' + stats = stats[:-1] + ') (%d): ' % len(vulns[vuln]) + resetColor + str(vulns[vuln]) + newLine + else: + stats += ( + '\t\t' + beforeStaticLabel + vuln + ' (%d): ' % len(vulns[vuln]) + + resetColor + str(vulns[vuln]) + newLine + ) + if elements is not None: + for element in elements: + if element in vulnsDict: + vulnName = vulnsDict[element][0] + vulnCVEList = vulnsDict[element][1] + stats += '\t\t' + beforeStaticLabel + vulnName + ' (' + for vulnCVE in vulnCVEList: + stats += vulnCVE + ',' + stats = stats[:-1] + '): ' + resetColor + str(elements[element]) + newLine + else: + stats += '\t\t' + beforeStaticLabel + element + ': ' + resetColor + str( + elements[element]) + newLine + if COLORIZED_OUTPUT and not options.avoidColors: + beforeStaticLabel = staticColor + urls = statsVersion['URLs'] + if urls is not None: + stats += newLine + beforeStaticLabel + '\tFound URLs:' + resetColor + newLine + for url in urls: + stats += '\t\t' + url + newLine + stats += newLine * 2 + if fileName is not None: + print(stats) + if options.isInteractive: + from peepdf.PDFConsole import PDFConsole + + console = PDFConsole(pdf, VT_KEY, options.avoidColors) + while not console.leaving: + try: + console.cmdloop() + except KeyboardInterrupt as e: + sys.exit() + except: + errorMessage = '*** Error: Exception not handled using the interactive console!! Please, report it to the author!!' + print(errorColor + errorMessage + resetColor + newLine) + traceback.print_exc(file=open(errorsFile, 'a')) + except Exception as e: + if len(e.args) == 2: + excName, excReason = e.args + else: + excName = None + if excName is None or excName != 'PeepException': + errorMessage = '*** Error: Exception not handled!!' + traceback.print_exc(file=open(errorsFile, 'a')) + print(errorColor + errorMessage + resetColor + newLine) + finally: + if os.path.exists(errorsFile): + message = newLine + 'Any errors are logged to file "%s"' % (errorsFile) + message = errorColor + message + resetColor + sys.exit(message) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3d48dba --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +jsbeautifier>=1.6.2 +colorama>=0.3.7 +future>=0.16.0 +Pillow>=3.2.0 +pythonaes==1.0 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..fca180f --- /dev/null +++ b/setup.py @@ -0,0 +1,24 @@ +from setuptools import setup + +setup( + name="peepdf", + version="0.4.3", + author="Jose Miguel Esparza", + license="GNU GPLv3", + url="http://eternal-todo.com", + install_requires=[ + "jsbeautifier>=1.6.2", + "colorama>=0.3.7", + "future>=0.16.0", + "Pillow>=3.2.0", + "pythonaes==1.0", + ], + entry_points={ + "console_scripts": [ + "peepdf = peepdf.main:main", + ], + }, + packages=[ + "peepdf", + ], +) diff --git a/tests/files/BB-1-Overview.pdf b/tests/files/BB-1-Overview.pdf new file mode 100644 index 0000000..8957f9a Binary files /dev/null and b/tests/files/BB-1-Overview.pdf differ diff --git a/tests/files/js_in_pdf.js b/tests/files/js_in_pdf.js new file mode 100644 index 0000000..5da0a0b Binary files /dev/null and b/tests/files/js_in_pdf.js differ diff --git a/tests/files/phishing0.pdf b/tests/files/phishing0.pdf new file mode 100644 index 0000000..5750034 Binary files /dev/null and b/tests/files/phishing0.pdf differ diff --git a/tests/files/worldreport.pdf b/tests/files/worldreport.pdf new file mode 100644 index 0000000..eb94fd2 Binary files /dev/null and b/tests/files/worldreport.pdf differ diff --git a/tests/test_pee.py b/tests/test_pee.py new file mode 100644 index 0000000..7a1fc62 --- /dev/null +++ b/tests/test_pee.py @@ -0,0 +1,86 @@ +# peepdf is a tool to analyse and modify PDF files +# http://peepdf.eternal-todo.com +# By Jose Miguel Esparza +# +# Copyright (C) 2016 Jose Miguel Esparza +# +# This file is part of peepdf. +# +# peepdf is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# peepdf is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with peepdf. If not, see . +# + +import mock +import pytest +import time + +import peepdf +import peepdf.main + +def test_js_detect(): + p = peepdf.PDFCore.PDFParser() + r, f = p.parse( + "tests/files/js_in_pdf.js", forceMode=True, + looseMode=True, manualAnalysis=False + ) + assert not r + + for version in range(f.updates + 1): + for obj in f.body[version].objects.values(): + if isinstance(obj, peepdf.PDFCore.PDFIndirectObject): + o = obj.getObject() + if isinstance(o, peepdf.PDFCore.PDFStream): + stream = o.decodedStream + isJS = peepdf.JSAnalysis.isJavascript(stream) + if "function docOpened()" in stream: + assert isJS + else: + assert not isJS + +def test_whitespace_after_opening(): + p = peepdf.PDFCore.PDFParser() + r, f = p.parse( + "tests/files/BB-1-Overview.pdf", + forceMode=True, looseMode=True, manualAnalysis=False + ) + assert not r + + for obj in f.body[1].objects.values(): + if obj.object.type == "stream": + assert obj.object.errors != [ + "Decoding error: Error decompressing string" + ] + +def test_lxml_missing(): + with mock.patch.dict(peepdf.main.__dict__, {"etree": None}): + with pytest.raises(AssertionError) as e: + peepdf.main.getPeepXML(None, None, None) + e.match("lxml must be installed") + +def test_quickish_isjs(): + t = time.time() + peepdf.PDFCore.PDFParser().parse( + "tests/files/phishing0.pdf", forceMode=True, + looseMode=True, manualAnalysis=False + ) + # Should take no more than 2 seconds (in 0.3.5 this would take >5 seconds). + assert time.time() - t < 2 + +def test_ignore_ghostscript(): + t = time.time() + peepdf.PDFCore.PDFParser().parse( + "tests/files/worldreport.pdf", forceMode=True, + looseMode=True, manualAnalysis=False + ) + # Should take less than 20 seconds (in 0.4.1 this would take >1 minute). + assert time.time() - t < 20