Skip to content

Commit

Permalink
Code review: 312320043: Added attribute container identifiers #771
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Apr 3, 2017
1 parent a598478 commit d67f65b
Show file tree
Hide file tree
Showing 33 changed files with 1,069 additions and 1,121 deletions.
2 changes: 1 addition & 1 deletion config/dpkg/changelog
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ plaso (1.5.2-1) unstable; urgency=low

* Auto-generated

-- Log2Timeline <[email protected]> Mon, 03 Apr 2017 17:20:47 +0200
-- Log2Timeline <[email protected]> Mon, 03 Apr 2017 18:46:02 +0200
8 changes: 8 additions & 0 deletions docs/plaso.storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ plaso.storage.gzip_file module
:undoc-members:
:show-inheritance:

plaso.storage.identifiers module
--------------------------------

.. automodule:: plaso.storage.identifiers
:members:
:undoc-members:
:show-inheritance:

plaso.storage.interface module
------------------------------

Expand Down
11 changes: 7 additions & 4 deletions plaso/analysis/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
else:
import queue as Queue # pylint: disable=import-error

# pylint: disable=wrong-import-position
import requests

# Some distributions unvendor urllib3 from the requests module, and we need to
Expand Down Expand Up @@ -69,13 +70,15 @@ def _CreateEventTag(self, event, comment, labels):
comment (str): event tag comment.
labels (list[str]): event tag labels.
"""
event_uuid = getattr(event, u'uuid', None)
event_tag = events.EventTag(
comment=comment, event_uuid=event_uuid)
event_identifier = event.GetIdentifier()

event_tag = events.EventTag(comment=comment)
event_tag.SetEventIdentifier(event_identifier)
event_tag.AddLabels(labels)

event_identifier_string = event_identifier.CopyToString()
logging.debug(u'Created event tag: {0:s} for event: {1:s}'.format(
comment, event_uuid))
comment, event_identifier_string))

return event_tag

Expand Down
1 change: 1 addition & 0 deletions plaso/analysis/tagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def ExamineEvent(self, mediator, event):

labels = list(efilter_api.getvalues(matched_labels))
event_tag = self._CreateEventTag(event, self._EVENT_TAG_COMMENT, labels)

mediator.ProduceEventTag(event_tag)
self._number_of_event_tags += 1

Expand Down
2 changes: 0 additions & 2 deletions plaso/containers/event_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ class EventSource(interface.AttributeContainer):
data_type (str): attribute container type indicator.
file_entry_type (str): dfVFS file entry type.
path_spec (dfvfs.PathSpec): path specification.
storage_session (int): storage session number or 0 if not set.
"""
CONTAINER_TYPE = u'event_source'
DATA_TYPE = None
Expand All @@ -31,7 +30,6 @@ def __init__(self, path_spec=None):
self.data_type = self.DATA_TYPE
self.file_entry_type = None
self.path_spec = path_spec
self.storage_session = 0


class FileEntryEventSource(EventSource):
Expand Down
78 changes: 35 additions & 43 deletions plaso/containers/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,8 @@ def __eq__(self, event_object):
self.data_type != event_object.data_type):
return False

attribute_names = set(self.__dict__.keys())
if attribute_names != set(event_object.__dict__.keys()):
attribute_names = set(self.GetAttributeNames())
if attribute_names != set(event_object.GetAttributeNames()):
return False

# Here we have to deal with "near" duplicates, so not all attributes
Expand Down Expand Up @@ -171,7 +171,7 @@ def EqualityString(self):
str: string representation of the event object that can be used for
equality comparison.
"""
attribute_names = set(self.__dict__.keys())
attribute_names = set(self.GetAttributeNames())
fields = sorted(list(attribute_names.difference(self.COMPARE_EXCLUDE)))

# TODO: Review this (after 1.1.0 release). Is there a better/more clean
Expand Down Expand Up @@ -226,33 +226,31 @@ def GetAttributeNames(self):
list[str]: attribute names.
"""
attribute_names = []
for attribute_name in self.__dict__.keys():
attribute_value = getattr(self, attribute_name, None)
if attribute_value is not None:
attribute_names.append(attribute_name)
for attribute_name, attribute_value in self.GetAttributes():
if attribute_value is None:
continue

attribute_names.append(attribute_name)

return attribute_names


class EventTag(interface.AttributeContainer):
"""Class to represent an event tag attribute container.
The event tag either needs to have an event_uuid defined or both
the store_number and store_index to be valid. If both defined
the store_number and store_index is preferred.
Attributes:
comment (str): comments.
event_entry_index (int): serialized data stream entry index of the event,
this attribute is used by the ZIP and GZIP storage files to
uniquely identify the event linked to the tag.
event_stream_number (int): number of the serialized event stream, this
attribute is used by the ZIP and GZIP storage files to uniquely
identify the event linked to the tag.
event_uuid (str): event identifier (UUID).
labels (list[str]): labels, such as "malware", "application_execution".
store_index (int): store index of the corresponding event.
store_number (int): store number of the corresponding event.
"""
CONTAINER_TYPE = u'event_tag'

_ATTRIBUTE_NAMES = frozenset([
u'comment', u'event_uuid', u'labels', u'store_index', u'store_number'])

_INVALID_LABEL_CHARACTERS_REGEX = re.compile(r'[^A-Za-z0-9_]')

_VALID_LABEL_REGEX = re.compile(r'^[A-Za-z0-9_]+$')
Expand All @@ -265,20 +263,12 @@ def __init__(self, comment=None, event_uuid=None):
event_uuid (Optional[str]): event identifier (UUID).
"""
super(EventTag, self).__init__()
self._event_identifier = None
self.comment = comment
self.event_entry_index = None
self.event_stream_number = None
self.event_uuid = event_uuid
self.labels = []
# TODO: deprecate store number and index.
self.store_index = None
self.store_number = None

@property
def string_key(self):
"""str: string index key for this tag."""
if self.event_uuid is not None:
return self.event_uuid

return u'{0:d}:{1:d}'.format(self.store_number, self.store_index)

def AddComment(self, comment):
"""Adds a comment to the event tag.
Expand Down Expand Up @@ -342,13 +332,6 @@ def CopyToDict(self):
result_dict = {
u'labels': self.labels
}
if (self.store_number is not None and self.store_index is not None and
self.store_number > -1 and self.store_index > -1):
result_dict[u'store_number'] = self.store_number
result_dict[u'store_index'] = self.store_index
else:
result_dict[u'event_uuid'] = self.event_uuid

if self.comment:
result_dict[u'comment'] = self.comment

Expand All @@ -371,18 +354,27 @@ def CopyTextToLabel(cls, text, prefix=u''):
text = u'{0:s}{1:s}'.format(prefix, text)
return cls._INVALID_LABEL_CHARACTERS_REGEX.sub(u'_', text)

def GetAttributes(self):
"""Retrieves the attributes from the event tag object.
def GetEventIdentifier(self):
"""Retrieves the identifier of the event associated with the event tag.
Attributes that are set to None are ignored.
The event identifier is a storage specific value that should not
be serialized.
Returns:
AttributeContainerIdentifier: event identifier or None when not set.
"""
return self._event_identifier

Yields:
tuple[str, str]: event tag attribute name and value.
def SetEventIdentifier(self, event_identifier):
"""Sets the identifier of the event associated with the event tag.
The event identifier is a storage specific value that should not
be serialized.
Args:
event_identifier (AttributeContainerIdentifier): event identifier.
"""
for attribute_name in self._ATTRIBUTE_NAMES:
attribute_value = getattr(self, attribute_name, None)
if attribute_value is not None:
yield attribute_name, attribute_value
self._event_identifier = event_identifier


manager.AttributeContainersManager.RegisterAttributeContainers([
Expand Down
107 changes: 90 additions & 17 deletions plaso/containers/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,127 @@
from efilter.protocols import structured


class AttributeContainerIdentifier(object):
"""The attribute container identifier.
The identifier is used to uniquely identify attribute containers.
The value should be unique at runtime and in storage.
"""

def __init__(self):
"""Initializes an attribute container identifier."""
super(AttributeContainerIdentifier, self).__init__()
self._identifier = id(self)

def CopyToString(self):
"""Copies the identifier to a string representation.
Returns:
str: unique identifier or None.
"""
return u'{0:d}'.format(self._identifier)


class AttributeContainer(object):
"""Class that defines the attribute container interface.
"""The attribute container interface.
This is the the base class for those object that exists primarily as
a container of attributes with basic accessors and mutators.
The CONTAINER_TYPE class attribute contains a string that identifies
the container type e.g. the container type "event" identifiers an event
object.
Attributes are public class members of an serializable type. Protected
and private class members are not to be serialized.
"""
CONTAINER_TYPE = None

def __init__(self):
"""Initializes an attribute container."""
super(AttributeContainer, self).__init__()
self._identifier = AttributeContainerIdentifier()
self._session_identifier = None

def CopyToDict(self):
"""Copies the attribute container to a dictionary.
Returns:
A dictionary containing the attribute container attributes.
dict[str, object]: attribute values per name.
"""
dictionary = {}
return {
attribute_name: attribute_value
for attribute_name, attribute_value in self.GetAttributes()}

def GetAttributeNames(self):
"""Retrieves the names of all attributes.
Returns:
list[str]: attribute names.
"""
attribute_names = []
for attribute_name in iter(self.__dict__.keys()):
attribute_value = getattr(self, attribute_name, None)
if attribute_value is not None:
dictionary[attribute_name] = attribute_value
if attribute_name.startswith(u'_'):
continue
attribute_names.append(attribute_name)

return dictionary
return attribute_names

def GetAttributes(self):
"""Retrieves the attribute names and values.
Attributes that are set to None are ignored.
Yields:
A tuple containing an attribute name and value.
tuple[str, object]: attribute name and value.
"""
for attribute_name in iter(self.__dict__.keys()):
attribute_value = getattr(self, attribute_name, None)
if attribute_value is not None:
yield attribute_name, attribute_value
for attribute_name, attribute_value in iter(self.__dict__.items()):
if attribute_name.startswith(u'_') or attribute_value is None:
continue

def GetAttributeNames(self):
"""Retrieves the names of all attributes.
yield attribute_name, attribute_value

Attributes that are set to None are ignored.
def GetIdentifier(self):
"""Retrieves the identifier.
The identifier is a storage specific value that should not be serialized.
Returns:
AttributeContainerIdentifier: an unique identifier for the container.
"""
return self._identifier

def GetSessionIdentifier(self):
"""Retrieves the session identifier.
The session identifier is a storage specific value that should not
be serialized.
Returns:
A list containing the attribute container attribute names.
str: session identifier.
"""
return self._session_identifier

def SetIdentifier(self, identifier):
"""Sets the identifier.
The identifier is a storage specific value that should not be serialized.
Args:
identifier (AttributeContainerIdentifier): identifier.
"""
self._identifier = identifier

def SetSessionIdentifier(self, session_identifier):
"""Sets the session identifier.
The session identifier is a storage specific value that should not
be serialized.
Args:
session_identifier (str): session identifier.
"""
return [name for name, _ in list(self.GetAttributes())]
self._session_identifier = session_identifier


# Efilter protocol definition to enable filtering of containers.
Expand Down
Loading

0 comments on commit d67f65b

Please sign in to comment.