Skip to content

Commit

Permalink
Code review: 245190043: Timelib changes for issue #218
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Jun 10, 2015
1 parent a67963c commit f56b8eb
Show file tree
Hide file tree
Showing 16 changed files with 279 additions and 225 deletions.
2 changes: 1 addition & 1 deletion config/dpkg/changelog
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ python-plaso (1.3.0-1) unstable; urgency=low

* Auto-generated

-- Log2Timeline <[email protected]> Wed, 10 Jun 2015 18:31:29 +0200
-- Log2Timeline <[email protected]> Wed, 10 Jun 2015 18:39:55 +0200
1 change: 1 addition & 0 deletions plaso/frontend/psort.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def GetTimeSlice(self, event_time_string, duration=5, timezone=pytz.UTC):
A time slice object (instance of TimeSlice).
"""
if event_time_string:
# TODO: move this to a place where bad config can be raised.
event_timestamp = timelib.Timestamp.FromTimeString(
event_time_string, timezone=timezone)
else:
Expand Down
4 changes: 2 additions & 2 deletions plaso/lib/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class SourceScannerError(Error):
"""Class that defines source scanner errors."""


class TimestampNotCorrectlyFormed(Error):
"""Raised when there is an error adding a timestamp to an EventObject."""
class TimestampError(Error):
"""Class that defines timestamp errors."""


class UnableToOpenFile(Error):
Expand Down
7 changes: 3 additions & 4 deletions plaso/lib/pfilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# TODO: Changes this so it becomes an attribute instead of having backend
# load a front-end library.
from plaso.frontend import presets

from plaso.lib import errors
from plaso.lib import limit
from plaso.lib import objectfilter
from plaso.lib import timelib
Expand Down Expand Up @@ -262,9 +262,8 @@ def __init__(self, data):
try:
self.data = timelib.Timestamp.FromTimeString(
utils.GetUnicodeString(data))
except ValueError as exception:
raise ValueError(u'Wrongly formatted date string: {0:s} - {1:s}'.format(
data, exception))
except (ValueError, errors.TimestampError):
raise ValueError(u'Wrongly formatted date string: {0:s}'.format(data))
elif isinstance(data, datetime.datetime):
self.data = timelib.Timestamp.FromPythonDatetime(data)
elif isinstance(DateCompareObject, data):
Expand Down
79 changes: 28 additions & 51 deletions plaso/lib/timelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import time
import pytz

from plaso.lib import errors


MONTH_DICT = {
'jan': 1,
Expand Down Expand Up @@ -623,15 +625,12 @@ def FromTimeParts(

@classmethod
def FromTimeString(
cls, time_string, timezone=pytz.UTC, dayfirst=False,
gmt_as_timezone=True):
cls, time_string, dayfirst=False, gmt_as_timezone=True,
timezone=pytz.UTC):
"""Converts a string containing a date and time value into a timestamp.
Args:
time_string: String that contains a date and time value.
timezone: Optional timezone object (instance of pytz.timezone) that
the data and time value in the string represents. This value
is used when the timezone cannot be determined from the string.
dayfirst: An optional boolean argument. If set to true then the
parser will change the precedence in which it parses timestamps
from MM-DD-YYYY to DD-MM-YYYY (and YYYY-MM-DD will be
Expand All @@ -641,13 +640,33 @@ def FromTimeString(
this is set to true, that is GMT can be interpreted
differently than UTC. If that is not the expected result
this attribute can be set to false.
timezone: Optional timezone object (instance of pytz.timezone) that
the data and time value in the string represents. This value
is used when the timezone cannot be determined from the string.
Returns:
An integer containing the timestamp or 0 on error.
An integer containing the timestamp.
Raises:
TimestampError: if the time string could not be parsed.
"""
datetime_object = StringToDatetime(
time_string, timezone=timezone, dayfirst=dayfirst,
gmt_as_timezone=gmt_as_timezone)
if not gmt_as_timezone and time_string.endswith(' GMT'):
time_string = u'{0:s}UTC'.format(time_string[:-3])

try:
# TODO: deprecate the use of dateutil parser.
datetime_object = dateutil.parser.parse(time_string, dayfirst=dayfirst)

except (TypeError, ValueError) as exception:
raise errors.TimestampError((
u'Unable to convert time string: {0:s} in to a datetime object '
u'with error: {1:s}').format(time_string, exception))

if datetime_object.tzinfo:
datetime_object = datetime_object.astimezone(pytz.UTC)
else:
datetime_object = timezone.localize(datetime_object)

return cls.FromPythonDatetime(datetime_object)

@classmethod
Expand Down Expand Up @@ -726,48 +745,6 @@ def RoundToSeconds(cls, timestamp):
return int(scrubbed + rounded * cls.MICRO_SECONDS_PER_SECOND)


# TODO: deprecate in favor of CopyFromString, which will remove the
# dependency on dateutil.parser.
def StringToDatetime(
time_string, timezone=pytz.UTC, dayfirst=False, gmt_as_timezone=True):
"""Converts a string representation of a timestamp into a datetime object.
Args:
time_string: String that contains a date and time value.
timezone: Optional timezone object (instance of pytz.timezone) that
the data and time value in the string represents. This value
is used when the timezone cannot be determined from the string.
dayfirst: An optional boolean argument. If set to true then the
parser will change the precedence in which it parses timestamps
from MM-DD-YYYY to DD-MM-YYYY (and YYYY-MM-DD will be YYYY-DD-MM,
etc).
gmt_as_timezone: Sometimes the dateutil parser will interpret GMT and UTC
the same way, that is not make a distinction. By default
this is set to true, that is GMT can be interpreted
differently than UTC. If that is not the expected result
this attribute can be set to false.
Returns:
A datetime object.
"""
if not gmt_as_timezone and time_string.endswith(' GMT'):
time_string = u'{0:s}UTC'.format(time_string[:-3])

try:
datetime_object = dateutil.parser.parse(time_string, dayfirst=dayfirst)

except (TypeError, ValueError) as exception:
logging.error(
u'Unable to copy {0:s} to a datetime object with error: {1:s}'.format(
time_string, exception))
return datetime.datetime(1970, 1, 1, 0, 0, 0, 0, tzinfo=pytz.UTC)

if datetime_object.tzinfo:
return datetime_object.astimezone(pytz.UTC)

return timezone.localize(datetime_object)


def GetCurrentYear():
"""Determines the current year."""
datetime_object = datetime.datetime.now()
Expand Down
3 changes: 2 additions & 1 deletion plaso/parsers/filestat.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def __init__(self, timestamp, usage, allocated, size, fs_type):
"""Initializes the event.
Args:
timestamp: The timestamp value.
timestamp: The timestamp time value. The timestamp contains the
number of microseconds since Jan 1, 1970 00:00:00 UTC
usage: The usage string describing the timestamp.
allocated: Boolean value to indicate the file entry is allocated.
size: The file size in bytes.
Expand Down
9 changes: 7 additions & 2 deletions plaso/parsers/java_idx.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,13 @@ def ParseFileObject(self, parser_mediator, file_object, **kwargs):
# information in the string itself. If that is not the case then
# there is no reliable method for plaso to determine the proper
# timezone, so the assumption is that it is UTC.
download_date = timelib.Timestamp.FromTimeString(
value.string, gmt_as_timezone=False)
try:
download_date = timelib.Timestamp.FromTimeString(
value.string, gmt_as_timezone=False)
except errors.TimestampError:
download_date = None
parser_mediator.ProduceParseError(
u'Unable to parse time value: {0:s}'.format(value.string))

if not url or not ip_address:
raise errors.UnableToParseFile(
Expand Down
51 changes: 30 additions & 21 deletions plaso/parsers/mcafeeav.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging

from plaso.events import text_events
from plaso.lib import errors
from plaso.lib import timelib
from plaso.parsers import manager
from plaso.parsers import text_parser
Expand Down Expand Up @@ -42,30 +43,39 @@ class McafeeAccessProtectionParser(text_parser.TextCSVParser):
COLUMNS = [u'date', u'time', u'status', u'username', u'filename',
u'trigger_location', u'rule', u'action']

def _GetTimestamp(self, date, time, timezone):
"""Return a 64-bit signed timestamp in microseconds since Epoch.
def _GetTimestamp(self, parser_mediator, date, time):
"""Determines a timestamp from the time string.
The timestamp is made up of two strings, the date and the time, separated
by a tab. The time is in local time. The month and day can be either 1 or 2
characters long. E.g.: 7/30/2013\t10:22:48 AM
The date and time are made up of two strings, the date and the time,
separated by a tab. The time is in local time. The month and day can
be either 1 or 2 characters long, e.g.: 7/30/2013\\t10:22:48 AM
Args:
date: The string representing the date.
time: The string representing the time.
timezone: The timezone object.
Args:
parser_mediator: A parser mediator object (instance of ParserMediator).
date: The string representing the date.
time: The string representing the time.
Returns:
A plaso timestamp value, microseconds since Epoch in UTC or None.
Returns:
The timestamp time value. The timestamp contains the number of
microseconds since Jan 1, 1970 00:00:00 UTC or None if the time string
could not be parsed.
"""

# TODO: check if this is correct, likely not date or not time
# is more accurate.
if not (date and time):
logging.warning(u'Unable to extract timestamp from McAfee AV logline.')
return

# TODO: Figure out how McAfee sets Day First and use that here.
# The in-file time format is '07/30/2013\t10:22:48 AM'.
return timelib.Timestamp.FromTimeString(
u'{0:s} {1:s}'.format(date, time), timezone=timezone)
time_string = u'{0:s} {1:s}'.format(date, time)
try:
return timelib.Timestamp.FromTimeString(
time_string, timezone=parser_mediator.timezone)

except errors.TimestampError:
parser_mediator.ProduceParseError(
u'Unable to parse time string: {0:s}'.format(time_string))

def VerifyRow(self, parser_mediator, row):
"""Verify that this is a McAfee AV Access Protection Log file.
Expand All @@ -88,9 +98,8 @@ def VerifyRow(self, parser_mediator, row):

# Check the date format!
# If it doesn't pass, then this isn't a McAfee AV Access Protection Log
try:
self._GetTimestamp(row[u'date'], row[u'time'], parser_mediator.timezone)
except (TypeError, ValueError):
timestamp = self._GetTimestamp(parser_mediator, row[u'date'], row[u'time'])
if timestamp is None:
return False

# Use the presence of these strings as a backup or in case of partial file.
Expand All @@ -109,10 +118,10 @@ def ParseRow(self, parser_mediator, row_offset, row):
row: A dictionary containing all the fields as denoted in the
COLUMNS class list.
"""
timestamp = self._GetTimestamp(
row[u'date'], row[u'time'], parser_mediator.timezone)
event_object = McafeeAVEvent(timestamp, row_offset, row)
parser_mediator.ProduceEvent(event_object)
timestamp = self._GetTimestamp(parser_mediator, row[u'date'], row[u'time'])
if timestamp is not None:
event_object = McafeeAVEvent(timestamp, row_offset, row)
parser_mediator.ProduceEvent(event_object)


manager.ParsersManager.RegisterParser(McafeeAccessProtectionParser)
29 changes: 16 additions & 13 deletions plaso/parsers/opera.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,42 +10,40 @@

from plaso.events import time_events
from plaso.lib import errors
from plaso.lib import event
from plaso.lib import eventdata
from plaso.lib import timelib
from plaso.lib import utils
from plaso.parsers import interface
from plaso.parsers import manager


class OperaTypedHistoryEvent(event.EventObject):
class OperaTypedHistoryEvent(time_events.TimestampEvent):
"""An EventObject for an Opera typed history entry."""

DATA_TYPE = u'opera:history:typed_entry'

def __init__(self, last_typed_time, url, entry_type):
def __init__(self, timestamp, url, entry_type):
"""A constructor for the typed history event.
Args:
last_typed_time: A ISO 8601 string denoting the last time
the URL was typed into a browser.
timestamp: The timestamp time value. The timestamp contains the
number of microseconds since Jan 1, 1970 00:00:00 UTC
url: The url, or the typed hostname.
entry_type: A string indicating whether the URL was directly
typed in or the result of the user choosing from the
auto complete (based on prior history).
"""
super(OperaTypedHistoryEvent, self).__init__()
self.url = url
super(OperaTypedHistoryEvent, self).__init__(
timestamp, eventdata.EventTimestamp.LAST_VISITED_TIME)

self.entry_type = entry_type
self.url = url

if entry_type == u'selected':
self.entry_selection = u'Filled from autocomplete.'
elif entry_type == u'text':
self.entry_selection = u'Manually typed.'

self.timestamp = timelib.Timestamp.FromTimeString(last_typed_time)
self.timestamp_desc = eventdata.EventTimestamp.LAST_VISITED_TIME


class OperaGlobalHistoryEvent(time_events.PosixTimeEvent):
"""An EventObject for an Opera global history entry."""
Expand Down Expand Up @@ -119,14 +117,19 @@ def ParseFileObject(self, parser_mediator, file_object, **kwargs):

xml = ElementTree.parse(file_object)



for history_item in xml.iterfind(u'typed_history_item'):
content = history_item.get(u'content', u'')
last_typed = history_item.get(u'last_typed', u'')
entry_type = history_item.get(u'type', u'')

event_object = OperaTypedHistoryEvent(last_typed, content, entry_type)
try:
timestamp = timelib.Timestamp.FromTimeString(last_typed)
except errors.TimestampError:
parser_mediator.ProduceParseError(
u'Unable to parse time string: {0:s}'.format(last_typed))
continue

event_object = OperaTypedHistoryEvent(timestamp, content, entry_type)
parser_mediator.ProduceEvent(event_object)


Expand Down
Loading

0 comments on commit f56b8eb

Please sign in to comment.