diff --git a/config/dpkg/changelog b/config/dpkg/changelog index bde0406897..77cf2ede2e 100644 --- a/config/dpkg/changelog +++ b/config/dpkg/changelog @@ -2,4 +2,4 @@ python-plaso (1.2.1-1) unstable; urgency=low * Auto-generated - -- Log2Timeline Tue, 02 Jun 2015 17:54:38 +0200 + -- Log2Timeline Wed, 03 Jun 2015 12:34:02 +0200 diff --git a/plaso/__init__.py b/plaso/__init__.py index f442cc017f..c423e93da3 100644 --- a/plaso/__init__.py +++ b/plaso/__init__.py @@ -3,7 +3,7 @@ __version__ = '1.2.1' VERSION_DEV = True -VERSION_DATE = '20150602' +VERSION_DATE = '20150603' def GetVersion(): diff --git a/plaso/parsers/firefox_cache.py b/plaso/parsers/firefox_cache.py index 8622d577af..169a9ea3fb 100644 --- a/plaso/parsers/firefox_cache.py +++ b/plaso/parsers/firefox_cache.py @@ -14,6 +14,7 @@ from plaso.parsers import interface from plaso.parsers import manager + __author__ = 'Petter Bjelland (petter.bjelland@gmail.com)' @@ -22,232 +23,249 @@ class FirefoxCacheEvent(time_events.PosixTimeEvent): DATA_TYPE = u'firefox:cache:record' - def __init__( - self, timestamp_type, timestamp, version, metadata, request_method, url, - response_code): - super(FirefoxCacheEvent, self).__init__(timestamp, timestamp_type) - - self.cache_version = version + def __init__(self, timestamp, timestamp_type, cache_record_values): + """Initializes the event object. - # Fields found in both cache 1 and 2. - self.last_modified = metadata.last_modified - self.fetch_count = metadata.fetch_count - self.request_size = metadata.request_size - self.request_method = request_method - self.url = url - self.response_code = response_code - - if not version == 1: - return + Args: + timestamp: The POSIX timestamp value. + timestamp_description: A description string for the timestamp value. + cache_record_values: A dictionary object containing the cache record + values. + """ + super(FirefoxCacheEvent, self).__init__(timestamp, timestamp_type) - # Fields found in cache 1 only. - self.major = metadata.major - self.minor = metadata.minor - self.location = metadata.location - self.info_size = metadata.info_size - self.data_size = metadata.data_size + for key, value in iter(cache_record_values.items()): + setattr(self, key, value) -class FirefoxCacheParser(interface.BaseParser): - """Extract cache version 2 records from Firefox >= 32.""" +class BaseFirefoxCacheParser(interface.SingleFileBaseParser): + """Parses Firefox cache files.""" - NAME = u'firefox_cache' + # pylint: disable=abstract-method DESCRIPTION = u'Parser for Firefox Cache files.' - CACHE_VERSION = 2 - - REQUEST_METHODS = frozenset([ - u'CONNECT', u'DELETE', u'GET', u'HEAD', u'OPTIONS', - u'PATCH', u'POST', u'PUT', u'TRACE']) - - MAX_URL_LENGTH = 65536 - - # Cache 2 filenames are SHA-1 hex digests. - CACHE_NAME = pyparsing.Word(pyparsing.hexnums, exact=40) - - # The last four bytes of a file gives the size of the cached content. - LENGTH = construct.Struct(u'length', construct.UBInt32(u'length')) - - CHUNK_SIZE = 512 * 1024 - - CACHE_RECORD_HEADER_STRUCT = construct.Struct( - u'record_header', - construct.UBInt32(u'major'), - construct.UBInt32(u'fetch_count'), - construct.UBInt32(u'last_fetched'), - construct.UBInt32(u'last_modified'), - construct.UBInt32(u'frecency'), - construct.UBInt32(u'expire_time'), - construct.UBInt32(u'request_size')) - - def _Accept(self, candidate): - """Determine whether the candidate is a valid cache record.""" + _MAXIMUM_URL_LENGTH = 65536 - return ( - candidate.request_size > 0 - and candidate.request_size < self.MAX_URL_LENGTH - and candidate.major == 1 and candidate.last_fetched > 0 - and candidate.fetch_count > 0) + _REQUEST_METHODS = frozenset([ + u'CONNECT', u'DELETE', u'GET', u'HEAD', u'OPTIONS', u'PATCH', u'POST', + u'PUT', u'TRACE']) - def _ParseHTTPHeaders(self, filename, offset, headers): - """Extract relevant information from HTTP header.""" + def _ParseHTTPHeaders(self, header_data, offset, display_name): + """Extract relevant information from HTTP header. + Args: + header_data: binary string containing the HTTP header data. + offset: the offset of the cache record. + display_name: the display name. + """ try: - http_header_start = headers.index(u'request-method') + http_header_start = header_data.index(b'request-method') except ValueError: - logging.debug(u'No request method in header: "{0:s}"'.format(headers)) + logging.debug(u'No request method in header: "{0:s}"'.format(header_data)) return None, None # HTTP request and response headers. - http_headers = headers[http_header_start::] + http_headers = header_data[http_header_start::] - header_parts = http_headers.split(u'\x00') + header_parts = http_headers.split(b'\x00') + # TODO: check len(header_parts). request_method = header_parts[1] - if request_method not in self.REQUEST_METHODS: - safe_headers = headers.decode(u'ascii', errors=u'replace') + if request_method not in self._REQUEST_METHODS: + safe_headers = header_data.decode(u'ascii', errors=u'replace') logging.debug(( u'[{0:s}] {1:s}:{2:d}: Unknown HTTP method \'{3:s}\'. Response ' u'headers: \'{4:s}\'').format( - self.NAME, filename, offset, request_method, safe_headers)) + self.NAME, display_name, offset, request_method, safe_headers)) try: - response_head_start = http_headers.index(u'response-head') + response_head_start = http_headers.index(b'response-head') except ValueError: - logging.debug(u'No response head in header: "{0:s}"'.format(headers)) + logging.debug(u'No response head in header: "{0:s}"'.format(header_data)) return request_method, None # HTTP response headers. response_head = http_headers[response_head_start::] - response_head_parts = response_head.split(u'\x00') + response_head_parts = response_head.split(b'\x00') # Response code, followed by other response header key-value pairs, # separated by newline. + # TODO: check len(response_head_parts). response_head_text = response_head_parts[1] - response_head_text_parts = response_head_text.split(u'\r\n') + response_head_text_parts = response_head_text.split(b'\r\n') # The first line contains response code. + # TODO: check len(response_head_text_parts). response_code = response_head_text_parts[0] - if not response_code.startswith(u'HTTP'): - safe_headers = headers.decode(u'ascii', errors=u'replace') + if not response_code.startswith(b'HTTP'): + safe_headers = header_data.decode(u'ascii', errors=u'replace') logging.debug(( u'[{0:s}] {1:s}:{2:d}: Could not determine HTTP response code. ' u'Response headers: \'{3:s}\'.').format( - self.NAME, filename, offset, safe_headers)) + self.NAME, display_name, offset, safe_headers)) return request_method, response_code + def _ValidateCacheRecordHeader(self, cache_record_header): + """Determines whether the cache record header is valid. + + Args: + cache_record_header: the cache record header (instance of + construct.Struct). + + Returns: + A boolean value indicating the cache record header is valid. + """ + return ( + cache_record_header.request_size > 0 and + cache_record_header.request_size < self._MAXIMUM_URL_LENGTH and + cache_record_header.major == 1 and + cache_record_header.last_fetched > 0 and + cache_record_header.fetch_count > 0) + + +class FirefoxCacheParser(BaseFirefoxCacheParser): + """Parses Firefox 32 or later cache files.""" + + NAME = u'firefox_cache' + + CACHE_VERSION = 2 + + # Cache 2 filenames are SHA-1 hex digests. + # TODO: change into regexp. + _CACHE_FILENAME = pyparsing.Word(pyparsing.hexnums, exact=40) + + # The last four bytes of a file gives the size of the cached content. + _LENGTH = construct.UBInt32(u'length') + + _CACHE_RECORD_HEADER_STRUCT = construct.Struct( + u'record_header', + construct.UBInt32(u'major'), + construct.UBInt32(u'fetch_count'), + construct.UBInt32(u'last_fetched'), + construct.UBInt32(u'last_modified'), + construct.UBInt32(u'frequency'), + construct.UBInt32(u'expire_time'), + construct.UBInt32(u'request_size')) + + _CHUNK_SIZE = 512 * 1024 + def _GetStartOfMetadata(self, file_object): """Determine the byte offset of the cache record metadata in cache file. - This method is inspired by the unlicensed work of EnCase instructor - James Habben: https://github.com/JamesHabben/FirefoxCache2 + This method is inspired by the work of James Habben: + https://github.com/JamesHabben/FirefoxCache2 Args: file_object: The file containing the cache record. """ - file_object.seek(-4, os.SEEK_END) try: - length = self.LENGTH.parse_stream(file_object).length + length = self._LENGTH.parse_stream(file_object) except (IOError, construct.FieldError): raise IOError(u'Could not find metadata offset in Firefox cache file.') # Firefox splits the content into chunks. - hash_chunks = length // self.CHUNK_SIZE - if length % self.CHUNK_SIZE != 0: + hash_chunks, remainder = divmod(length, self._CHUNK_SIZE) + if remainder != 0: hash_chunks += 1 # Each chunk in the cached record is padded with two bytes. - total_chunk_padding = hash_chunks * 2 - - return length + total_chunk_padding + return length + (hash_chunks * 2) - def Parse(self, parser_mediator, **kwargs): - """Extract records from a Firefox cache file. + def ParseFileObject(self, parser_mediator, file_object, **kwargs): + """Parses a Firefox cache file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). - """ + file_object: A file-like object. + Raises: + UnableToParseFile: when the file cannot be parsed. + """ file_entry = parser_mediator.GetFileEntry() try: # Match cache2 filename (SHA-1 hex of cache record key). - self.CACHE_NAME.parseString(file_entry.name) + self._CACHE_FILENAME.parseString(file_entry.name) except pyparsing.ParseException: raise errors.UnableToParseFile(u'Not a Firefox cache2 file.') - if file_entry.GetStat().size == 0: + if file_object.get_size() == 0: raise errors.UnableToParseFile(u'Empty file.') - file_object = parser_mediator.GetFileObject() - meta_start = self._GetStartOfMetadata(file_object) file_object.seek(meta_start, os.SEEK_SET) - # The first four bytes of metadata is a hash value of the cached content. - file_object.read(4) + # Skip the first 4 bytes of metadata which contains a hash value of + # the cached content. + file_object.seek(4, os.SEEK_CUR) try: - candidate = self.CACHE_RECORD_HEADER_STRUCT.parse_stream(file_object) + cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream( + file_object) except (IOError, construct.FieldError): - file_object.close() raise errors.UnableToParseFile(u'Not a Firefox cache2 file.') - if not self._Accept(candidate): - file_object.close() + if not self._ValidateCacheRecordHeader(cache_record_header): raise errors.UnableToParseFile(u'Not a valid Firefox cache2 record.') - url = file_object.read(candidate.request_size) + url = file_object.read(cache_record_header.request_size) - headers = file_object.read() + header_data = file_object.read() + display_name = parser_mediator.GetDisplayName() request_method, response_code = self._ParseHTTPHeaders( - file_entry.name, meta_start, headers) - - args = [self.CACHE_VERSION, candidate, request_method, url, response_code] - - parser_mediator.ProduceEvent(FirefoxCacheEvent( - eventdata.EventTimestamp.LAST_VISITED_TIME, - candidate.last_fetched, *args)) - - if candidate.last_modified: - parser_mediator.ProduceEvent(FirefoxCacheEvent( - eventdata.EventTimestamp.WRITTEN_TIME, candidate.last_modified, - *args)) - - if candidate.expire_time: - parser_mediator.ProduceEvent(FirefoxCacheEvent( - eventdata.EventTimestamp.EXPIRATION_TIME, candidate.expire_time, - *args)) - - file_object.close() - - -class FirefoxOldCacheParser(FirefoxCacheParser): - """Extract cached records from Firefox < 32.""" + header_data, meta_start, display_name) + + cache_record_values = { + u'fetch_count': cache_record_header.fetch_count, + u'frequency': cache_record_header.frequency, + u'major': cache_record_header.major, + u'request_method': request_method, + u'request_size': cache_record_header.request_size, + u'response_code': response_code, + u'version': self.CACHE_VERSION, + u'url': url} + + event_object = FirefoxCacheEvent( + cache_record_header.last_fetched, + eventdata.EventTimestamp.LAST_VISITED_TIME, cache_record_values) + parser_mediator.ProduceEvent(event_object) + + if cache_record_header.last_modified: + event_object = FirefoxCacheEvent( + cache_record_header.last_modified, + eventdata.EventTimestamp.WRITTEN_TIME, cache_record_values) + parser_mediator.ProduceEvent(event_object) + + if cache_record_header.expire_time: + event_object = FirefoxCacheEvent( + cache_record_header.expire_time, + eventdata.EventTimestamp.EXPIRATION_TIME, cache_record_values) + parser_mediator.ProduceEvent(event_object) + + +class FirefoxOldCacheParser(BaseFirefoxCacheParser): + """Parses Firefox 31 or earlier cache files.""" NAME = u'firefox_old_cache' CACHE_VERSION = 1 - # Number of bytes allocated to a cache record metadata. - RECORD_HEADER_SIZE = 36 - - # Initial size of Firefox >= 4 cache files. - INITIAL_CACHE_FILE_SIZE = 1024 * 1024 * 4 + # Initial size of Firefox 4 and later cache files. + _INITIAL_CACHE_FILE_SIZE = 4 * 1024 * 1024 # Smallest possible block size in Firefox cache files. - MIN_BLOCK_SIZE = 256 + _MINUMUM_BLOCK_SIZE = 256 - OLD_CACHE_RECORD_HEADER_STRUCT = construct.Struct( + _CACHE_RECORD_HEADER_STRUCT = construct.Struct( u'record_header', construct.UBInt16(u'major'), construct.UBInt16(u'minor'), @@ -260,22 +278,31 @@ class FirefoxOldCacheParser(FirefoxCacheParser): construct.UBInt32(u'request_size'), construct.UBInt32(u'info_size')) - ALTERNATIVE_CACHE_NAME = ( - pyparsing.Word(pyparsing.hexnums, exact=5) + pyparsing.Word(u'm', exact=1) - + pyparsing.Word(pyparsing.nums, exact=2)) + _CACHE_RECORD_HEADER_SIZE = _CACHE_RECORD_HEADER_STRUCT.sizeof() + + # TODO: change into regexp. + _CACHE_FILENAME = ( + pyparsing.Word(pyparsing.hexnums, exact=5) + + pyparsing.Word(u'm', exact=1) + + pyparsing.Word(pyparsing.nums, exact=2)) FIREFOX_CACHE_CONFIG = collections.namedtuple( u'firefox_cache_config', u'block_size first_record_offset') - def _GetFirefoxConfig(self, file_entry): - """Determine cache file block size. Raises exception if not found.""" + def _GetFirefoxConfig(self, file_object, display_name): + """Determine cache file block size. - file_object = file_entry.GetFileObject() + Args: + file_object: A file-like object. + display_name: the display name. - # There ought to be a valid record within the first 4MB. We use this + Raises: + UnableToParseFile: if no valid cache record could be found. + """ + # There ought to be a valid record within the first 4 MiB. We use this # limit to prevent reading large invalid files. - to_read = min(file_object.get_size(), self.INITIAL_CACHE_FILE_SIZE) + to_read = min(file_object.get_size(), self._INITIAL_CACHE_FILE_SIZE) while file_object.get_offset() < to_read: offset = file_object.get_offset() @@ -284,10 +311,11 @@ def _GetFirefoxConfig(self, file_entry): # We have not yet determined the block size, so we use the smallest # possible size. fetched, _, _ = self._NextRecord( - file_entry.name, file_object, self.MIN_BLOCK_SIZE) + file_object, display_name, self._MINUMUM_BLOCK_SIZE) record_size = ( - self.RECORD_HEADER_SIZE + fetched.request_size + fetched.info_size) + self._CACHE_RECORD_HEADER_SIZE + fetched.request_size + + fetched.info_size) if record_size >= 4096: # _CACHE_003_ @@ -303,89 +331,116 @@ def _GetFirefoxConfig(self, file_entry): except IOError: logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid record.'.format( - self.NAME, file_entry.name, offset)) + self.NAME, display_name, offset)) raise errors.UnableToParseFile( - u'Could not find a valid cache record. ' - u'Not a Firefox cache file.') + u'Could not find a valid cache record. Not a Firefox cache file.') - def _NextRecord(self, filename, file_object, block_size): - """Provide the next cache record.""" + def _NextRecord(self, file_object, display_name, block_size): + """Provide the next cache record. + + Args: + file_object: A file-like object. + display_name: the display name. + block_size: the block size. + Returns: + A tuple containing the fetched, modified and expire event objects + (instances of EventObject) or None. + """ offset = file_object.get_offset() try: - candidate = self.OLD_CACHE_RECORD_HEADER_STRUCT.parse_stream(file_object) + cache_record_header = self._CACHE_RECORD_HEADER_STRUCT.parse_stream( + file_object) except (IOError, construct.FieldError): raise IOError(u'Unable to parse stream.') - if not self._Accept(candidate): + if not self._ValidateCacheRecordHeader(cache_record_header): # Move reader to next candidate block. - file_object.seek(block_size - self.RECORD_HEADER_SIZE, os.SEEK_CUR) + file_offset = block_size - self._CACHE_RECORD_HEADER_SIZE + file_object.seek(file_offset, os.SEEK_CUR) raise IOError(u'Not a valid Firefox cache record.') # The last byte in a request is null. - url = file_object.read(candidate.request_size)[:-1] + url = file_object.read(cache_record_header.request_size)[:-1] # HTTP response header, even elements are keys, odd elements values. - headers = file_object.read(candidate.info_size) + header_data = file_object.read(cache_record_header.info_size) request_method, response_code = self._ParseHTTPHeaders( - filename, offset, headers) + header_data, offset, display_name) # A request can span multiple blocks, so we use modulo. - _, remainder = divmod(file_object.get_offset() - offset, block_size) + file_offset = file_object.get_offset() - offset + _, remainder = divmod(file_offset, block_size) # Move reader to next candidate block. Include the null-byte skipped above. file_object.seek(block_size - remainder, os.SEEK_CUR) - args = [self.CACHE_VERSION, candidate, request_method, url, response_code] + cache_record_values = { + u'data_size': cache_record_header.data_size, + u'fetch_count': cache_record_header.fetch_count, + u'info_size': cache_record_header.info_size, + u'location': cache_record_header.location, + u'major': cache_record_header.major, + u'minor': cache_record_header.minor, + u'request_method': request_method, + u'request_size': cache_record_header.request_size, + u'response_code': response_code, + u'version': self.CACHE_VERSION, + u'url': url} fetched = FirefoxCacheEvent( - eventdata.EventTimestamp.LAST_VISITED_TIME, - candidate.last_fetched, *args) - - modified = None - expire = None + cache_record_header.last_fetched, + eventdata.EventTimestamp.LAST_VISITED_TIME, cache_record_values) - if candidate.last_modified: + if cache_record_header.last_modified: modified = FirefoxCacheEvent( - eventdata.EventTimestamp.WRITTEN_TIME, candidate.last_modified, *args) + cache_record_header.last_modified, + eventdata.EventTimestamp.WRITTEN_TIME, cache_record_values) + else: + modified = None - if candidate.expire_time: + if cache_record_header.expire_time: expire = FirefoxCacheEvent( - eventdata.EventTimestamp.EXPIRATION_TIME, candidate.expire_time, - *args) + cache_record_header.expire_time, + eventdata.EventTimestamp.EXPIRATION_TIME, cache_record_values) + else: + expire = None return fetched, modified, expire - def Parse(self, parser_mediator, **kwargs): - """Extract records from a Firefox cache file. + def ParseFileObject(self, parser_mediator, file_object, **kwargs): + """Parses a Firefox cache file-like object. Args: parser_mediator: A parser mediator object (instance of ParserMediator). + file_object: A file-like object. + + Raises: + UnableToParseFile: when the file cannot be parsed. """ - file_object = parser_mediator.GetFileObject() file_entry = parser_mediator.GetFileEntry() + display_name = parser_mediator.GetDisplayName() try: - # Match alternative filename. Five hex characters + 'm' + two digit + # Match cache filename. Five hex characters + 'm' + two digit # number, e.g. '01ABCm02'. 'm' is for metadata. Cache files with 'd' # instead contain data only. - self.ALTERNATIVE_CACHE_NAME.parseString(file_entry.name) + self._CACHE_FILENAME.parseString(file_entry.name) except pyparsing.ParseException: if not file_entry.name.startswith(u'_CACHE_00'): - file_object.close() raise errors.UnableToParseFile(u'Not a Firefox cache1 file.') - firefox_config = self._GetFirefoxConfig(file_entry) + firefox_config = self._GetFirefoxConfig(file_object, display_name) file_object.seek(firefox_config.first_record_offset) while file_object.get_offset() < file_object.get_size(): try: fetched, modified, expire = self._NextRecord( - file_entry.name, file_object, firefox_config.block_size) + file_object, display_name, firefox_config.block_size) parser_mediator.ProduceEvent(fetched) if modified: @@ -394,11 +449,10 @@ def Parse(self, parser_mediator, **kwargs): if expire: parser_mediator.ProduceEvent(expire) except IOError: - logging.debug(u'[{0:s}] {1:s}:{2:d}: Invalid cache record.'.format( - self.NAME, file_entry.name, - file_object.get_offset() - self.MIN_BLOCK_SIZE)) - - file_object.close() + file_offset = file_object.get_offset() - self._MINUMUM_BLOCK_SIZE + logging.debug(( + u'[{0:s}] Invalid cache record in file: {1:s} at offset: ' + u'{2:d}.').format(self.NAME, display_name, file_offset)) manager.ParsersManager.RegisterParsers([