diff --git a/plaso/cli/extraction_tool.py b/plaso/cli/extraction_tool.py index b13b294e53..34c1effb3a 100644 --- a/plaso/cli/extraction_tool.py +++ b/plaso/cli/extraction_tool.py @@ -183,6 +183,8 @@ def _CreateExtractionProcessingConfiguration(self): ProcessingConfiguration: extraction processing configuration. """ configuration = configurations.ProcessingConfiguration() + configuration.artifact_definitions_path = self._artifact_definitions_path + configuration.custom_artifacts_path = self._custom_artifacts_path configuration.data_location = self._data_location configuration.extraction.archive_types_string = self._archive_types_string configuration.artifact_filters = self._artifact_filters @@ -412,35 +414,6 @@ def _ParseProcessingOptions(self, options): dfvfs_definitions.PREFERRED_GPT_BACK_END = ( dfvfs_definitions.TYPE_INDICATOR_GPT) - def _PreprocessSource(self, extraction_engine, storage_writer): - """Preprocesses the source. - - Args: - extraction_engine (BaseEngine): extraction engine to preprocess - the sources. - storage_writer (StorageWriter): storage writer. - - Returns: - list[SystemConfigurationArtifact]: system configurations found in - the source. - """ - logger.debug('Starting preprocessing.') - - try: - system_configurations = extraction_engine.PreprocessSource( - self._artifact_definitions_path, self._custom_artifacts_path, - self._file_system_path_specs, storage_writer, - resolver_context=self._resolver_context) - - except IOError as exception: - system_configurations = [] - - logger.error('Unable to preprocess with error: {0!s}'.format(exception)) - - logger.debug('Preprocessing done.') - - return system_configurations - def _ProcessSource(self, session, storage_writer): """Processes the source and extract events. @@ -460,19 +433,32 @@ def _ProcessSource(self, session, storage_writer): extraction_engine = self._CreateExtractionEngine(single_process_mode) + extraction_engine.BuildArtifactsRegistry( + self._artifact_definitions_path, self._custom_artifacts_path) + source_configuration = artifacts.SourceConfigurationArtifact( path=self._source_path, source_type=self._source_type) # TODO: check if the source was processed previously. # TODO: add check for modification time of source. - if self._source_type not in self._SOURCE_TYPES_TO_PREPROCESS: - system_configurations = [] - else: - # If the source is a directory or a storage media image - # run pre-processing. - system_configurations = self._PreprocessSource( - extraction_engine, storage_writer) + # If the source is a directory or a storage media image run pre-processing. + + system_configurations = [] + if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: + try: + logger.debug('Starting preprocessing.') + + system_configurations = extraction_engine.PreprocessSource( + self._file_system_path_specs, storage_writer, + resolver_context=self._resolver_context) + + logger.debug('Preprocessing done.') + + except IOError as exception: + system_configurations = [] + + logger.error('Unable to preprocess with error: {0!s}'.format(exception)) # TODO: check if the source was processed previously and if system # configuration differs. @@ -510,13 +496,15 @@ def _ProcessSource(self, session, storage_writer): self._extract_winevt_resources = False - configuration = self._CreateExtractionProcessingConfiguration() + processing_configuration = ( + self._CreateExtractionProcessingConfiguration()) + processing_configuration.force_parser = force_parser + environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) try: extraction_engine.BuildCollectionFilters( - self._artifact_definitions_path, self._custom_artifacts_path, environment_variables, artifact_filter_names=self._artifact_filters, filter_file_path=self._filter_file) except errors.InvalidFilter as exception: @@ -551,18 +539,17 @@ def _ProcessSource(self, session, storage_writer): logger.debug('Starting extraction in single process mode.') processing_status = extraction_engine.ProcessSource( - storage_writer, self._resolver_context, configuration, - system_configurations, self._file_system_path_specs, - force_parser=force_parser) + storage_writer, self._resolver_context, processing_configuration, + system_configurations, self._file_system_path_specs) else: logger.debug('Starting extraction in multi process mode.') - # The following overrides are needed because pylint 2.6.0 gets confused - # about which ProcessSource to check against. - # pylint: disable=no-value-for-parameter,unexpected-keyword-arg - processing_status = extraction_engine.ProcessSource( - storage_writer, session.identifier, configuration, + # The method is named ProcessSourceMulti because pylint 2.6.0 and + # later gets confused about keyword arguments when ProcessSource + # is used. + processing_status = extraction_engine.ProcessSourceMulti( + storage_writer, session.identifier, processing_configuration, system_configurations, self._file_system_path_specs, enable_sigsegv_handler=self._enable_sigsegv_handler, storage_file_path=self._storage_file_path) diff --git a/plaso/cli/image_export_tool.py b/plaso/cli/image_export_tool.py index 348140b800..acd8f8f3ef 100644 --- a/plaso/cli/image_export_tool.py +++ b/plaso/cli/image_export_tool.py @@ -314,20 +314,35 @@ def _Extract( """ extraction_engine = engine.BaseEngine() - # If the source is a directory or a storage media image - # run pre-processing. + extraction_engine.BuildArtifactsRegistry( + artifact_definitions_path, custom_artifacts_path) + + # If the source is a directory or a storage media image run pre-processing. + + system_configurations = [] if self._source_type in self._SOURCE_TYPES_TO_PREPROCESS: - system_configurations = self._PreprocessSource(extraction_engine) + try: + logger.debug('Starting preprocessing.') + + # Setting storage writer to None here since we do not want to store + # preprocessing information. + system_configurations = extraction_engine.PreprocessSource( + self._file_system_path_specs, None, + resolver_context=self._resolver_context) + + logger.debug('Preprocessing done.') - # TODO: use system_configurations instead of knowledge base - _ = system_configurations + except IOError as exception: + logger.error('Unable to preprocess with error: {0!s}'.format(exception)) + + # TODO: use system_configurations instead of knowledge base + _ = system_configurations environment_variables = ( extraction_engine.knowledge_base.GetEnvironmentVariables()) try: extraction_engine.BuildCollectionFilters( - artifact_definitions_path, custom_artifacts_path, environment_variables, artifact_filter_names=artifact_filters, filter_file_path=filter_file) except errors.InvalidFilter as exception: @@ -335,13 +350,8 @@ def _Extract( 'Unable to build collection filters with error: {0!s}'.format( exception)) - filters_helper = extraction_engine.collection_filters_helper - - excluded_find_specs = None - included_find_specs = None - if filters_helper: - excluded_find_specs = filters_helper.excluded_file_system_find_specs - included_find_specs = filters_helper.included_file_system_find_specs + excluded_find_specs = extraction_engine.GetCollectionExcludedFindSpecs() + included_find_specs = extraction_engine.GetCollectionIncludedFindSpecs() output_writer.Write('Extracting file entries.\n') @@ -475,34 +485,6 @@ def _ParseSignatureIdentifiers(self, data_location, signature_identifiers): specification_store, signature_identifiers) self._filter_collection.AddFilter(file_entry_filter) - def _PreprocessSource(self, extraction_engine): - """Preprocesses the source. - - Args: - extraction_engine (BaseEngine): extraction engine to preprocess - the sources. - - Returns: - list[SystemConfigurationArtifact]: system configurations found in - the source. - """ - logger.debug('Starting preprocessing.') - - try: - # Setting storage writer to None here since we do not want to store - # preprocessing information. - system_configurations = extraction_engine.PreprocessSource( - self._artifact_definitions_path, self._custom_artifacts_path, - self._file_system_path_specs, None, - resolver_context=self._resolver_context) - - except IOError as exception: - logger.error('Unable to preprocess with error: {0!s}'.format(exception)) - - logger.debug('Preprocessing done.') - - return system_configurations - def _ReadSpecificationFile(self, path): """Reads the format specification file. diff --git a/plaso/engine/configurations.py b/plaso/engine/configurations.py index d6d8af005f..593c887e51 100644 --- a/plaso/engine/configurations.py +++ b/plaso/engine/configurations.py @@ -183,10 +183,14 @@ class ProcessingConfiguration(interface.AttributeContainer): """Configuration settings for processing. Attributes: + artifact_definitions_path (str): path to artifact definitions directory + or file. artifact_filters (Optional list[str]): names of artifact definitions that are used for filtering file system and Windows Registry key paths. credentials (list[CredentialConfiguration]): credential configurations. + custom_artifacts_path (str): path to custom artifact definitions + directory or file. data_location (str): path to the data files. debug_output (bool): True if debug output should be enabled. dynamic_time (bool): True if date and time values should be represented @@ -195,6 +199,8 @@ class ProcessingConfiguration(interface.AttributeContainer): configuration. extraction (ExtractionConfiguration): extraction configuration. filter_file (str): path to a file with find specifications. + force_parser (bool): True if a specified parser should be forced to be used + to extract events. log_filename (str): name of the log file. parser_filter_expression (str): parser filter expression, where None represents all parsers and plugins. @@ -215,14 +221,17 @@ class ProcessingConfiguration(interface.AttributeContainer): def __init__(self): """Initializes a process configuration object.""" super(ProcessingConfiguration, self).__init__() + self.artifact_definitions_path = None self.artifact_filters = None self.credentials = [] + self.custom_artifacts_path = None self.data_location = None self.debug_output = False self.dynamic_time = False self.event_extraction = EventExtractionConfiguration() self.extraction = ExtractionConfiguration() self.filter_file = None + self.force_parser = None self.log_filename = None self.parser_filter_expression = None self.preferred_codepage = None diff --git a/plaso/engine/engine.py b/plaso/engine/engine.py index 668fcc9f22..479cd0e947 100644 --- a/plaso/engine/engine.py +++ b/plaso/engine/engine.py @@ -30,8 +30,6 @@ class BaseEngine(object): """Processing engine interface. Attributes: - collection_filters_helper (CollectionFiltersHelper): collection filters - helper. knowledge_base (KnowledgeBase): knowledge base. """ @@ -43,6 +41,8 @@ def __init__(self): super(BaseEngine, self).__init__() self._abort = False self._analyzers_profiler = None + self._artifacts_registry = None + self._collection_filters_helper = None self._memory_profiler = None self._name = 'Main' self._processing_status = processing_status.ProcessingStatus() @@ -53,58 +53,8 @@ def __init__(self): self._storage_profiler = None self._task_queue_profiler = None - self.collection_filters_helper = None self.knowledge_base = knowledge_base.KnowledgeBase() - def _BuildArtifactsRegistry( - self, artifact_definitions_path, custom_artifacts_path): - """Build Find Specs from artifacts or filter file if available. - - Args: - artifact_definitions_path (str): path to artifact definitions directory - or file. - custom_artifacts_path (str): path to custom artifact definitions - directory or file. - - Returns: - artifacts.ArtifactDefinitionsRegistry: artifact definitions registry. - - Raises: - BadConfigOption: if artifact definitions cannot be read. - """ - if not artifact_definitions_path: - raise errors.BadConfigOption( - 'No such artifact definitions: {0:s}.'.format( - artifact_definitions_path)) - - registry = artifacts_registry.ArtifactDefinitionsRegistry() - reader = artifacts_reader.YamlArtifactsReader() - - try: - if os.path.isdir(artifact_definitions_path): - registry.ReadFromDirectory(reader, artifact_definitions_path) - else: - registry.ReadFromFile(reader, artifact_definitions_path) - - except (KeyError, artifacts_errors.FormatError) as exception: - raise errors.BadConfigOption(( - 'Unable to read artifact definitions from: {0:s} with error: ' - '{1!s}').format(artifact_definitions_path, exception)) - - if custom_artifacts_path: - try: - if os.path.isdir(custom_artifacts_path): - registry.ReadFromDirectory(reader, custom_artifacts_path) - else: - registry.ReadFromFile(reader, custom_artifacts_path) - - except (KeyError, artifacts_errors.FormatError) as exception: - raise errors.BadConfigOption(( - 'Unable to read custom artifact definitions from: {0:s} with ' - 'error: {1!s}').format(custom_artifacts_path, exception)) - - return registry - def _StartProfiling(self, configuration): """Starts profiling. @@ -173,6 +123,117 @@ def _StopProfiling(self): self._task_queue_profiler.Stop() self._task_queue_profiler = None + def BuildArtifactsRegistry( + self, artifact_definitions_path, custom_artifacts_path): + """Builds an artificats definition registry. + + Args: + artifact_definitions_path (str): path to artifact definitions directory + or file. + custom_artifacts_path (str): path to custom artifact definitions + directory or file. + + Raises: + BadConfigOption: if artifact definitions cannot be read. + """ + if not artifact_definitions_path: + raise errors.BadConfigOption('Missing artifact definitions path.') + + registry = artifacts_registry.ArtifactDefinitionsRegistry() + reader = artifacts_reader.YamlArtifactsReader() + + try: + if os.path.isdir(artifact_definitions_path): + registry.ReadFromDirectory(reader, artifact_definitions_path) + else: + registry.ReadFromFile(reader, artifact_definitions_path) + + except (KeyError, artifacts_errors.FormatError) as exception: + raise errors.BadConfigOption(( + 'Unable to read artifact definitions from: {0:s} with error: ' + '{1!s}').format(artifact_definitions_path, exception)) + + if custom_artifacts_path: + try: + if os.path.isdir(custom_artifacts_path): + registry.ReadFromDirectory(reader, custom_artifacts_path) + else: + registry.ReadFromFile(reader, custom_artifacts_path) + + except (KeyError, artifacts_errors.FormatError) as exception: + raise errors.BadConfigOption(( + 'Unable to read custom artifact definitions from: {0:s} with ' + 'error: {1!s}').format(custom_artifacts_path, exception)) + + self._artifacts_registry = registry + + def BuildCollectionFilters( + self, environment_variables, artifact_filter_names=None, + filter_file_path=None): + """Builds collection filters from artifacts or filter file if available. + + Args: + environment_variables (list[EnvironmentVariableArtifact]): + environment variables. + artifact_filter_names (Optional[list[str]]): names of artifact + definitions that are used for filtering file system and Windows + Registry key paths. + filter_file_path (Optional[str]): path of filter file. + + Raises: + InvalidFilter: if no valid file system find specifications are built. + """ + filters_helper = None + + if artifact_filter_names: + logger.debug( + 'building find specification based on artifacts: {0:s}'.format( + ', '.join(artifact_filter_names))) + + filters_helper = artifact_filters.ArtifactDefinitionsFiltersHelper( + self._artifacts_registry) + filters_helper.BuildFindSpecs( + artifact_filter_names, environment_variables=environment_variables) + + # If the user selected Windows Registry artifacts we have to ensure + # the Windows Registry files are parsed. + if filters_helper.registry_find_specs: + filters_helper.BuildFindSpecs( + self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES, + environment_variables=environment_variables) + + if not filters_helper.included_file_system_find_specs: + raise errors.InvalidFilter( + 'No valid file system find specifications were built from ' + 'artifacts.') + + elif filter_file_path: + logger.debug( + 'building find specification based on filter file: {0:s}'.format( + filter_file_path)) + + filter_file_path_lower = filter_file_path.lower() + if (filter_file_path_lower.endswith('.yaml') or + filter_file_path_lower.endswith('.yml')): + filter_file_object = yaml_filter_file.YAMLFilterFile() + else: + filter_file_object = filter_file.FilterFile() + + filter_file_path_filters = filter_file_object.ReadFromFile( + filter_file_path) + + filters_helper = path_filters.PathCollectionFiltersHelper() + filters_helper.BuildFindSpecs( + filter_file_path_filters, environment_variables=environment_variables) + + if (not filters_helper.excluded_file_system_find_specs and + not filters_helper.included_file_system_find_specs): + raise errors.InvalidFilter(( + 'No valid file system find specifications were built from filter ' + 'file: {0:s}.').format(filter_file_path)) + + self._collection_filters_helper = filters_helper + # pylint: disable=too-many-arguments @classmethod def CreateSession( @@ -202,6 +263,26 @@ def CreateSession( return session + def GetCollectionExcludedFindSpecs(self): + """Retrieves find specifications to exclude from collection. + + Returns: + list[dfvfs.FindSpec]: find specifications to exclude from collection. + """ + return getattr( + self._collection_filters_helper, 'excluded_file_system_find_specs', + None) or [] + + def GetCollectionIncludedFindSpecs(self): + """Retrieves find specifications to include in collection. + + Returns: + list[dfvfs.FindSpec]: find specifications to include in collection. + """ + return getattr( + self._collection_filters_helper, 'included_file_system_find_specs', + None) or [] + def GetSourceFileSystem(self, file_system_path_spec, resolver_context=None): """Retrieves the file system of the source. @@ -237,15 +318,10 @@ def GetSourceFileSystem(self, file_system_path_spec, resolver_context=None): return file_system, mount_point def PreprocessSource( - self, artifact_definitions_path, custom_artifacts_path, - file_system_path_specs, storage_writer, resolver_context=None): + self, file_system_path_specs, storage_writer, resolver_context=None): """Preprocesses a source. Args: - artifact_definitions_path (str): path to artifact definitions directory - or file. - custom_artifacts_path (str): path to custom artifact definitions - directory or file. file_system_path_specs (list[dfvfs.PathSpec]): path specifications of the source file systems to process. storage_writer (StorageWriter): storage writer. @@ -255,9 +331,6 @@ def PreprocessSource( list[SystemConfigurationArtifact]: system configurations found in the source. """ - artifacts_registry_object = self._BuildArtifactsRegistry( - artifact_definitions_path, custom_artifacts_path) - mediator = preprocess_mediator.PreprocessMediator(storage_writer) detected_operating_systems = [] @@ -271,7 +344,7 @@ def PreprocessSource( continue preprocess_manager.PreprocessPluginsManager.RunPlugins( - artifacts_registry_object, file_system, mount_point, mediator) + self._artifacts_registry, file_system, mount_point, mediator) operating_system = mediator.GetValue('operating_system') if not operating_system: @@ -305,75 +378,6 @@ def PreprocessSource( return system_configurations - def BuildCollectionFilters( - self, artifact_definitions_path, custom_artifacts_path, - environment_variables, artifact_filter_names=None, filter_file_path=None): - """Builds collection filters from artifacts or filter file if available. - - Args: - artifact_definitions_path (str): path to artifact definitions file. - custom_artifacts_path (str): path to custom artifact definitions file. - environment_variables (list[EnvironmentVariableArtifact]): - environment variables. - artifact_filter_names (Optional[list[str]]): names of artifact - definitions that are used for filtering file system and Windows - Registry key paths. - filter_file_path (Optional[str]): path of filter file. - - Raises: - InvalidFilter: if no valid file system find specifications are built. - """ - if artifact_filter_names: - logger.debug( - 'building find specification based on artifacts: {0:s}'.format( - ', '.join(artifact_filter_names))) - - artifacts_registry_object = self._BuildArtifactsRegistry( - artifact_definitions_path, custom_artifacts_path) - self.collection_filters_helper = ( - artifact_filters.ArtifactDefinitionsFiltersHelper( - artifacts_registry_object)) - self.collection_filters_helper.BuildFindSpecs( - artifact_filter_names, environment_variables=environment_variables) - - # If the user selected Windows Registry artifacts we have to ensure - # the Windows Registry files are parsed. - if self.collection_filters_helper.registry_find_specs: - self.collection_filters_helper.BuildFindSpecs( - self._WINDOWS_REGISTRY_FILES_ARTIFACT_NAMES, - environment_variables=environment_variables) - - if not self.collection_filters_helper.included_file_system_find_specs: - raise errors.InvalidFilter( - 'No valid file system find specifications were built from ' - 'artifacts.') - - elif filter_file_path: - logger.debug( - 'building find specification based on filter file: {0:s}'.format( - filter_file_path)) - - filter_file_path_lower = filter_file_path.lower() - if (filter_file_path_lower.endswith('.yaml') or - filter_file_path_lower.endswith('.yml')): - filter_file_object = yaml_filter_file.YAMLFilterFile() - else: - filter_file_object = filter_file.FilterFile() - - filter_file_path_filters = filter_file_object.ReadFromFile( - filter_file_path) - - self.collection_filters_helper = ( - path_filters.PathCollectionFiltersHelper()) - self.collection_filters_helper.BuildFindSpecs( - filter_file_path_filters, environment_variables=environment_variables) - - if (not self.collection_filters_helper.excluded_file_system_find_specs and - not self.collection_filters_helper.included_file_system_find_specs): - raise errors.InvalidFilter(( - 'No valid file system find specifications were built from filter ' - 'file: {0:s}.').format(filter_file_path)) - def SetStatusUpdateInterval(self, status_update_interval): """Sets the status update interval. diff --git a/plaso/multi_process/extraction_engine.py b/plaso/multi_process/extraction_engine.py index e74a254925..ef54702170 100644 --- a/plaso/multi_process/extraction_engine.py +++ b/plaso/multi_process/extraction_engine.py @@ -554,17 +554,14 @@ def _ProcessSource( for parser_count in storage_writer.GetAttributeContainers( 'parser_count')}) - find_specs = None - if self.collection_filters_helper: - find_specs = ( - self.collection_filters_helper.included_file_system_find_specs) + included_find_specs = self.GetCollectionIncludedFindSpecs() for file_system_path_spec in file_system_path_specs: if self._abort: break path_spec_generator = self._path_spec_extractor.ExtractPathSpecs( - file_system_path_spec, find_specs=find_specs, + file_system_path_spec, find_specs=included_find_specs, recurse_file_system=False, resolver_context=self._resolver_context) for path_spec in path_spec_generator: if self._abort: @@ -765,7 +762,7 @@ def _StartWorkerProcess(self, process_name): environment_variables = list(self.knowledge_base.GetEnvironmentVariables()) process = extraction_process.ExtractionWorkerProcess( - task_queue, self.collection_filters_helper, + task_queue, self._collection_filters_helper, self._processing_configuration, self._system_configurations, environment_variables, enable_sigsegv_handler=self._enable_sigsegv_handler, name=process_name) @@ -946,7 +943,7 @@ def _UpdateStatus(self): if self._status_update_callback: self._status_update_callback(self._processing_status) - def ProcessSource( + def ProcessSourceMulti( self, storage_writer, session_identifier, processing_configuration, system_configurations, file_system_path_specs, enable_sigsegv_handler=False, storage_file_path=None): @@ -970,11 +967,31 @@ def ProcessSource( ProcessingStatus: processing status. Raises: - BadConfigOption: if the preferred time zone is invalid. + BadConfigOption: if an invalid collection filter was specified or if + the preferred time zone is invalid. """ self._enable_sigsegv_handler = enable_sigsegv_handler self._system_configurations = system_configurations + if not self._artifacts_registry: + # TODO: refactor. + self.BuildArtifactsRegistry( + processing_configuration.artifact_definitions_path, + processing_configuration.custom_artifacts_path) + + # TODO: get environment_variables per system_configuration + environment_variables = self.knowledge_base.GetEnvironmentVariables() + + try: + self.BuildCollectionFilters( + environment_variables, + artifact_filter_names=processing_configuration.artifact_filters, + filter_file_path=processing_configuration.filter_file) + except errors.InvalidFilter as exception: + raise errors.BadConfigOption( + 'Unable to build collection filters with error: {0!s}'.format( + exception)) + time_zones_per_path_spec = {} for system_configuration in system_configurations: if system_configuration.time_zone: @@ -982,7 +999,6 @@ def ProcessSource( if path_spec.parent: time_zones_per_path_spec[path_spec.parent] = ( system_configuration.time_zone) - self._event_data_timeliner = timeliner.EventDataTimeliner( data_location=processing_configuration.data_location, preferred_year=processing_configuration.preferred_year, diff --git a/plaso/single_process/extraction_engine.py b/plaso/single_process/extraction_engine.py index 0876d8f3d7..c0f224fc3c 100644 --- a/plaso/single_process/extraction_engine.py +++ b/plaso/single_process/extraction_engine.py @@ -156,10 +156,7 @@ def _ProcessPathSpec(self, parser_mediator, path_spec): self._current_display_name = parser_mediator.GetDisplayNameForPathSpec( path_spec) - excluded_find_specs = None - if self.collection_filters_helper: - excluded_find_specs = ( - self.collection_filters_helper.excluded_file_system_find_specs) + excluded_find_specs = self.GetCollectionExcludedFindSpecs() try: self._CacheFileSystem(path_spec) @@ -219,10 +216,7 @@ def _ProcessSource(self, parser_mediator, file_system_path_specs): self._current_display_name = '' self._number_of_consumed_sources = 0 - included_find_specs = None - if self.collection_filters_helper: - included_find_specs = ( - self.collection_filters_helper.included_file_system_find_specs) + included_find_specs = self.GetCollectionIncludedFindSpecs() for file_system_path_spec in file_system_path_specs: if self._abort: @@ -335,13 +329,27 @@ def _CreateParserMediator( Returns: ParserMediator: parser mediator. + + Raises: + BadConfigOption: if an invalid collection filter was specified. """ + # TODO: get environment_variables per system_configuration environment_variables = None if self.knowledge_base: environment_variables = self.knowledge_base.GetEnvironmentVariables() + try: + self.BuildCollectionFilters( + environment_variables, + artifact_filter_names=processing_configuration.artifact_filters, + filter_file_path=processing_configuration.filter_file) + except errors.InvalidFilter as exception: + raise errors.BadConfigOption( + 'Unable to build collection filters with error: {0!s}'.format( + exception)) + parser_mediator = parsers_mediator.ParserMediator( - collection_filters_helper=self.collection_filters_helper, + collection_filters_helper=self._collection_filters_helper, environment_variables=environment_variables, resolver_context=resolver_context, system_configurations=system_configurations) @@ -359,7 +367,7 @@ def _CreateParserMediator( def ProcessSource( self, storage_writer, resolver_context, processing_configuration, - system_configurations, file_system_path_specs, force_parser=False): + system_configurations, file_system_path_specs): """Processes file systems within a source. Args: @@ -371,21 +379,27 @@ def ProcessSource( configurations. file_system_path_specs (list[dfvfs.PathSpec]): path specifications of the source file systems to process. - force_parser (Optional[bool]): True if a specified parser should be forced - to be used to extract events. Returns: ProcessingStatus: processing status. Raises: - BadConfigOption: if the preferred time zone is invalid. + BadConfigOption: if an invalid collection filter was specified or if + the preferred time zone is invalid. """ + if not self._artifacts_registry: + # TODO: refactor. + self.BuildArtifactsRegistry( + processing_configuration.artifact_definitions_path, + processing_configuration.custom_artifacts_path) + parser_mediator = self._CreateParserMediator( resolver_context, processing_configuration, system_configurations) parser_mediator.SetStorageWriter(storage_writer) self._extraction_worker = worker.EventExtractionWorker( - force_parser=force_parser, parser_filter_expression=( + force_parser=processing_configuration.force_parser, + parser_filter_expression=( processing_configuration.parser_filter_expression)) self._extraction_worker.SetExtractionConfiguration( diff --git a/tests/cli/extraction_tool.py b/tests/cli/extraction_tool.py index 64e150c4cb..f749fd113d 100644 --- a/tests/cli/extraction_tool.py +++ b/tests/cli/extraction_tool.py @@ -216,7 +216,6 @@ def testParseProcessingOptions(self): test_tool._ParseProcessingOptions(options) - # TODO: add test for _PreprocessSource # TODO: add test for _ReadParserPresetsFromFile # TODO: add test for _SetExtractionPreferredTimeZone diff --git a/tests/engine/engine.py b/tests/engine/engine.py index e208c8f937..439102dbda 100644 --- a/tests/engine/engine.py +++ b/tests/engine/engine.py @@ -78,6 +78,23 @@ def testStartStopProfiling(self): test_engine._StartProfiling(configuration.profiling) test_engine._StopProfiling() + def testBuildArtifactsRegistry(self): + """Tests the BuildArtifactsRegistry function.""" + test_artifacts_path = shared_test_lib.GetTestFilePath(['artifacts']) + self._SkipIfPathNotExists(test_artifacts_path) + + test_engine = TestEngine() + + self.assertIsNone(test_engine._artifacts_registry) + + test_engine.BuildArtifactsRegistry(test_artifacts_path, None) + + self.assertIsNotNone(test_engine._artifacts_registry) + + # TODO: add test that raises BadConfigOption + + # TODO: add tests for BuildCollectionFilters. + def testCreateSession(self): """Tests the CreateSession function.""" test_engine = engine.BaseEngine() @@ -123,6 +140,7 @@ def testPreprocessSource(self): self._SkipIfPathNotExists(test_artifacts_path) test_engine = TestEngine() + test_engine.BuildArtifactsRegistry(test_artifacts_path, None) source_path_spec = path_spec_factory.Factory.NewPathSpec( dfvfs_definitions.TYPE_INDICATOR_FAKE, location='/') @@ -131,7 +149,7 @@ def testPreprocessSource(self): storage_writer.Open() source_configurations = test_engine.PreprocessSource( - test_artifacts_path, None, [source_path_spec], storage_writer) + [source_path_spec], storage_writer) self.assertEqual(len(source_configurations), 1) self.assertEqual(source_configurations[0].operating_system, 'Windows NT') diff --git a/tests/multi_process/extraction_engine.py b/tests/multi_process/extraction_engine.py index 668195a04b..922551d4d6 100644 --- a/tests/multi_process/extraction_engine.py +++ b/tests/multi_process/extraction_engine.py @@ -23,11 +23,12 @@ class ExtractionMultiProcessEngineTest(shared_test_lib.BaseTestCase): def testProcessSource(self): """Tests the PreprocessSource and ProcessSource functions.""" - artifacts_path = shared_test_lib.GetTestFilePath(['artifacts']) - self._SkipIfPathNotExists(artifacts_path) + test_artifacts_path = shared_test_lib.GetTestFilePath(['artifacts']) + self._SkipIfPathNotExists(test_artifacts_path) test_engine = extraction_engine.ExtractionMultiProcessEngine( maximum_number_of_tasks=100) + test_engine.BuildArtifactsRegistry(test_artifacts_path, None) test_file_path = self._GetTestFilePath(['ímynd.dd']) self._SkipIfPathNotExists(test_file_path) @@ -40,10 +41,11 @@ def testProcessSource(self): session = sessions.Session() - configuration = configurations.ProcessingConfiguration() - configuration.data_location = shared_test_lib.DATA_PATH - configuration.parser_filter_expression = 'filestat' - configuration.task_storage_format = definitions.STORAGE_FORMAT_SQLITE + processing_configuration = configurations.ProcessingConfiguration() + processing_configuration.data_location = shared_test_lib.DATA_PATH + processing_configuration.parser_filter_expression = 'filestat' + processing_configuration.task_storage_format = ( + definitions.STORAGE_FORMAT_SQLITE) with shared_test_lib.TempDirectory() as temp_directory: temp_file = os.path.join(temp_directory, 'storage.plaso') @@ -52,10 +54,13 @@ def testProcessSource(self): try: system_configurations = test_engine.PreprocessSource( - artifacts_path, None, [source_path_spec], storage_writer) + [source_path_spec], storage_writer) - processing_status = test_engine.ProcessSource( - storage_writer, session.identifier, configuration, + # The method is named ProcessSourceMulti because pylint 2.6.0 and + # later gets confused about keyword arguments when ProcessSource + # is used. + processing_status = test_engine.ProcessSourceMulti( + storage_writer, session.identifier, processing_configuration, system_configurations, [source_path_spec], storage_file_path=temp_directory) diff --git a/tests/single_process/extraction_engine.py b/tests/single_process/extraction_engine.py index 4c23e085f9..b62bd947ca 100644 --- a/tests/single_process/extraction_engine.py +++ b/tests/single_process/extraction_engine.py @@ -30,6 +30,7 @@ def testProcessSource(self): self._SkipIfPathNotExists(test_file_path) test_engine = extraction_engine.SingleProcessEngine() + test_engine.BuildArtifactsRegistry(test_artifacts_path, None) resolver_context = context.Context() os_path_spec = path_spec_factory.Factory.NewPathSpec( @@ -38,19 +39,19 @@ def testProcessSource(self): dfvfs_definitions.TYPE_INDICATOR_TSK, location='/', parent=os_path_spec) - configuration = configurations.ProcessingConfiguration() - configuration.data_location = shared_test_lib.DATA_PATH - configuration.parser_filter_expression = 'filestat' + processing_configuration = configurations.ProcessingConfiguration() + processing_configuration.data_location = shared_test_lib.DATA_PATH + processing_configuration.parser_filter_expression = 'filestat' storage_writer = fake_writer.FakeStorageWriter() storage_writer.Open() try: system_configurations = test_engine.PreprocessSource( - test_artifacts_path, None, [source_path_spec], storage_writer) + [source_path_spec], storage_writer) processing_status = test_engine.ProcessSource( - storage_writer, resolver_context, configuration, + storage_writer, resolver_context, processing_configuration, system_configurations, [source_path_spec]) number_of_events = storage_writer.GetNumberOfAttributeContainers('event')