Skip to content

Commit

Permalink
Changes to parser mediator for system configurations (#4633)
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz authored Apr 17, 2023
1 parent 84f5943 commit 388816b
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 55 deletions.
4 changes: 4 additions & 0 deletions plaso/containers/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ class SystemConfigurationArtifact(ArtifactAttributeContainer):
Attributes:
available_time_zones (list[TimeZone]): available time zones.
code_page (str): system code page.
environment_variables (list[EnvironmentVariableArtifact]): environment
variables.
hostname (HostnameArtifact): hostname.
keyboard_layout (str): keyboard layout.
language (str): system language.
Expand All @@ -471,6 +473,7 @@ class SystemConfigurationArtifact(ArtifactAttributeContainer):
# TODO: add SCHEMA
# SCHEMA = {
# 'code_page': 'str',
# 'environment_variables': Llist[EnvironmentVariableArtifact]',
# 'keyboard_layout': 'str',
# 'hostname': 'HostnameArtifact',
# 'language': 'str',
Expand All @@ -492,6 +495,7 @@ def __init__(self, code_page=None, language=None, time_zone=None):
# TODO: kept for backwards compatibility.
self.available_time_zones = []
self.code_page = code_page
self.environment_variables = []
self.hostname = None
self.keyboard_layout = None
self.language = language
Expand Down
4 changes: 4 additions & 0 deletions plaso/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ def PreprocessSource(

system_configuration = artifacts.SystemConfigurationArtifact(
code_page=mediator.code_page, language=mediator.language)
system_configuration.environment_variables = (
mediator.GetEnvironmentVariables())
system_configuration.hostname = mediator.hostname
system_configuration.keyboard_layout = mediator.GetValue(
'keyboard_layout')
Expand All @@ -371,6 +373,8 @@ def PreprocessSource(

system_configurations.append(system_configuration)

mediator.Reset()

if system_configurations:
# TODO: kept for backwards compatibility.
self.knowledge_base.ReadSystemConfigurationArtifact(
Expand Down
5 changes: 0 additions & 5 deletions plaso/multi_process/extraction_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,14 +757,9 @@ def _StartWorkerProcess(self, process_name):
port=self._task_queue_port,
timeout_seconds=self._TASK_QUEUE_TIMEOUT_SECONDS)

# Ensure environment_variables is a list otherwise pickle will fail
# on Windows when creating a new process.
environment_variables = list(self.knowledge_base.GetEnvironmentVariables())

process = extraction_process.ExtractionWorkerProcess(
task_queue, self._collection_filters_helper,
self._processing_configuration, self._system_configurations,
environment_variables,
enable_sigsegv_handler=self._enable_sigsegv_handler, name=process_name)

# Remove all possible log handlers to prevent a child process from logging
Expand Down
6 changes: 1 addition & 5 deletions plaso/multi_process/extraction_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ExtractionWorkerProcess(task_process.MultiProcessTaskProcess):

def __init__(
self, task_queue, collection_filters_helper, processing_configuration,
system_configurations, environment_variables, **kwargs):
system_configurations, **kwargs):
"""Initializes an extraction worker process.
Non-specified keyword arguments (kwargs) are directly passed to
Expand All @@ -37,8 +37,6 @@ def __init__(
configuration.
system_configurations (list[SystemConfigurationArtifact]): system
configurations.
environment_variables (list[EnvironmentVariableArtifact]): environment
variables
kwargs: keyword arguments to pass to multiprocessing.Process.
"""
super(ExtractionWorkerProcess, self).__init__(
Expand All @@ -47,7 +45,6 @@ def __init__(
self._collection_filters_helper = collection_filters_helper
self._buffer_size = 0
self._current_display_name = ''
self._environment_variables = environment_variables
self._extraction_worker = None
self._file_system_cache = []
self._number_of_consumed_sources = 0
Expand Down Expand Up @@ -100,7 +97,6 @@ def _CreateParserMediator(
"""
mediator = parsers_mediator.ParserMediator(
collection_filters_helper=self._collection_filters_helper,
environment_variables=self._environment_variables,
resolver_context=resolver_context,
system_configurations=system_configurations)

Expand Down
87 changes: 53 additions & 34 deletions plaso/parsers/mediator.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,21 @@ class ParserMediator(object):
_INT64_MAX = (1 << 63) - 1

def __init__(
self, collection_filters_helper=None, environment_variables=None,
resolver_context=None, system_configurations=None):
self, collection_filters_helper=None, resolver_context=None,
system_configurations=None):
"""Initializes a parser mediator.
Args:
collection_filters_helper (Optional[CollectionFiltersHelper]): collection
filters helper.
environment_variables (Optional[list[EnvironmentVariableArtifact]]):
environment variables.
resolver_context (Optional[dfvfs.Context]): resolver context.
system_configurations (Optional[list[SystemConfigurationArtifact]]):
system configurations.
"""
super(ParserMediator, self).__init__()
self._abort = False
self._cached_parser_chain = None
self._environment_variables = environment_variables or []
self._environment_variables_per_path_spec = None
self._event_data_stream = None
self._event_data_stream_identifier = None
self._extract_winevt_resources = True
Expand All @@ -79,14 +77,15 @@ def __init__(
self._process_information = None
self._resolver_context = resolver_context
self._storage_writer = None
self._system_configurations = system_configurations or []
self._temporary_directory = None
self._windows_event_log_providers_per_path = None

self.collection_filters_helper = collection_filters_helper
self.last_activity_timestamp = 0.0
self.parsers_counter = collections.Counter()

self._CreateEnvironmentVariablesPerPathSpec(system_configurations)

@property
def abort(self):
"""bool: True if parsing should be aborted."""
Expand Down Expand Up @@ -122,6 +121,35 @@ def temporary_directory(self):
"""str: path of the directory for temporary files."""
return self._temporary_directory

def _CreateEnvironmentVariablesPerPathSpec(self, system_configurations):
"""Creates the environment variables per path specification lookup table.
Args:
system_configurations (list[SystemConfigurationArtifact]): system
configurations.
"""
self._environment_variables_per_path_spec = {}
for system_configuration in system_configurations or []:
if system_configuration.environment_variables:
for path_spec in system_configuration.path_specs:
if path_spec.parent:
self._environment_variables_per_path_spec[path_spec.parent] = (
system_configuration.environment_variables)

def _GetEnvironmentVariablesByPathSpec(self, path_spec):
"""Retrieves the environment variables for a specific path specification.
Args:
path_spec (dfvfs.PathSpec): path specification.
Returns:
list[EnvironmentVariableArtifact]: environment variables.
"""
if not path_spec or not path_spec.parent:
return None

return self._environment_variables_per_path_spec.get(path_spec.parent, None)

def AddYearLessLogHelper(self, year_less_log_helper):
"""Adds a year-less log helper.
Expand Down Expand Up @@ -184,23 +212,17 @@ def ExpandWindowsPath(self, path):
Returns:
str: expanded Windows path.
"""
return path_helper.PathHelper.ExpandWindowsPath(
path, self._environment_variables)
path_spec = getattr(self._file_entry, 'path_spec', None)
environment_variables = self._GetEnvironmentVariablesByPathSpec(path_spec)
return path_helper.PathHelper.ExpandWindowsPath(path, environment_variables)

def GetCodePage(self, file_entry=None):
def GetCodePage(self):
"""Retrieves the code page related to the file entry.
Args:
file_entry (Optional[dfvfs.FileEntry]): file entry object, where None
will use the active file entry.
Returns:
str: code page.
"""
if not file_entry:
file_entry = self._file_entry

path_spec = getattr(file_entry, 'path_spec', None)
path_spec = getattr(self._file_entry, 'path_spec', None)
if path_spec:
# TODO: determine code page from system_configurations.
pass
Expand Down Expand Up @@ -277,20 +299,13 @@ def GetFilename(self):

return self._file_entry.name

def GetLanguageTag(self, file_entry=None):
def GetLanguageTag(self):
"""Retrieves the language tag related to the file entry.
Args:
file_entry (Optional[dfvfs.FileEntry]): file entry object, where None
will use the active file entry.
Returns:
str: code page.
"""
if not file_entry:
file_entry = self._file_entry

path_spec = getattr(file_entry, 'path_spec', None)
path_spec = getattr(self._file_entry, 'path_spec', None)
if path_spec:
# TODO: determine language tag from system_configurations.
pass
Expand All @@ -314,11 +329,11 @@ def GetRelativePath(self):
str: relative path of the current file entry or None if no current
file entry.
"""
if self._file_entry is None:
path_spec = getattr(self._file_entry, 'path_spec', None)
if not path_spec:
return None

return path_helper.PathHelper.GetRelativePathForPathSpec(
self._file_entry.path_spec)
return path_helper.PathHelper.GetRelativePathForPathSpec(path_spec)

def GetRelativePathForPathSpec(self, path_spec):
"""Retrieves the relative path for a path specification.
Expand All @@ -339,15 +354,19 @@ def GetWindowsEventLogMessageFile(self):
if no current file entry or no Windows EventLog message file was
found.
"""
path_spec = getattr(self._file_entry, 'path_spec', None)

if (self._windows_event_log_providers_per_path is None and
self._storage_writer):
environment_variables = self._GetEnvironmentVariablesByPathSpec(path_spec)

self._windows_event_log_providers_per_path = {}

for provider in self._storage_writer.GetAttributeContainers(
'windows_eventlog_provider'):
for windows_path in provider.event_message_files or []:
path, filename = path_helper.PathHelper.GetWindowsSystemPath(
windows_path, self._environment_variables)
windows_path, environment_variables)
path = path.lower()
filename = filename.lower()

Expand All @@ -360,13 +379,13 @@ def GetWindowsEventLogMessageFile(self):
self._windows_event_log_providers_per_path[path][filename] = provider

message_file = None
if self._file_entry:
if path_spec:
relative_path = path_helper.PathHelper.GetRelativePathForPathSpec(
self._file_entry.path_spec)
path_spec)
lookup_path = relative_path.lower()

path_segment_separator = path_helper.PathHelper.GetPathSegmentSeparator(
self._file_entry.path_spec)
path_spec)

lookup_path, _, lookup_filename = lookup_path.rpartition(
path_segment_separator)
Expand Down Expand Up @@ -588,9 +607,9 @@ def SetFileEntry(self, file_entry):
Args:
file_entry (dfvfs.FileEntry): file entry.
"""
self._file_entry = file_entry
self._event_data_stream = None
self._event_data_stream_identifier = None
self._file_entry = file_entry

def SetPreferredCodepage(self, code_page):
"""Sets the preferred code page.
Expand Down
14 changes: 14 additions & 0 deletions plaso/preprocessors/mediator.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,20 @@ def ProducePreprocessingWarning(self, plugin_name, message):

logger.debug('[{0:s}] {1:s}'.format(plugin_name, message))

def Reset(self):
"""Resets the values stored in the mediator."""
self._available_time_zones = {}
self._environment_variables = {}
self._file_entry = None
self._windows_eventlog_providers = {}
self._windows_eventlog_providers_by_identifier = {}
self._values = {}

self.code_page = None
self.hostname = None
self.language = None
self.time_zone = None

def SetCodePage(self, code_page):
"""Sets the code page.
Expand Down
1 change: 0 additions & 1 deletion plaso/single_process/extraction_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,6 @@ def _CreateParserMediator(

parser_mediator = parsers_mediator.ParserMediator(
collection_filters_helper=self._collection_filters_helper,
environment_variables=environment_variables,
resolver_context=resolver_context,
system_configurations=system_configurations)

Expand Down
15 changes: 12 additions & 3 deletions tests/containers/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,18 @@ def testGetAttributeNames(self):
attribute_container = artifacts.SystemConfigurationArtifact()

expected_attribute_names = [
'available_time_zones', 'code_page', 'hostname', 'keyboard_layout',
'language', 'operating_system', 'operating_system_product',
'operating_system_version', 'path_specs', 'time_zone', 'user_accounts']
'available_time_zones',
'code_page',
'environment_variables',
'hostname',
'keyboard_layout',
'language',
'operating_system',
'operating_system_product',
'operating_system_version',
'path_specs',
'time_zone',
'user_accounts']

attribute_names = sorted(attribute_container.GetAttributeNames())
self.assertEqual(attribute_names, expected_attribute_names)
Expand Down
Loading

0 comments on commit 388816b

Please sign in to comment.