Skip to content

Commit

Permalink
Code review: 249480043: Changes to scan inside BDE and VSS volumes #109.
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Jul 3, 2015
1 parent eebbf27 commit 57a083e
Show file tree
Hide file tree
Showing 15 changed files with 393 additions and 439 deletions.
2 changes: 1 addition & 1 deletion config/dpkg/changelog
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ python-plaso (1.3.0-1) unstable; urgency=low

* Auto-generated

-- Log2Timeline <[email protected]> Fri, 03 Jul 2015 20:04:31 +0200
-- Log2Timeline <[email protected]> Fri, 03 Jul 2015 20:11:25 +0200
68 changes: 41 additions & 27 deletions plaso/cli/storage_media_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dfvfs.credentials import manager as credentials_manager
from dfvfs.helpers import source_scanner
from dfvfs.lib import definitions as dfvfs_definitions
from dfvfs.path import factory as path_spec_factory
from dfvfs.lib import errors as dfvfs_errors
from dfvfs.volume import tsk_volume_system
from dfvfs.volume import vshadow_volume_system

Expand Down Expand Up @@ -52,7 +52,6 @@ def __init__(self, input_reader=None, output_writer=None):
self._partition_string = None
self._partition_offset = None
self._process_vss = False
# TODO: refactor to front-end.
self._source_scanner = source_scanner.SourceScanner()
self._source_path = None
self._source_path_specs = []
Expand Down Expand Up @@ -127,7 +126,6 @@ def _GetTSKPartitionIdentifiers(
volume_system = tsk_volume_system.TSKVolumeSystem()
volume_system.Open(scan_node.path_spec)

# TODO: refactor to front-end.
volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
volume_system)
if not volume_identifiers:
Expand Down Expand Up @@ -202,7 +200,6 @@ def _GetVSSStoreIdentifiers(self, scan_node, vss_stores=None):
volume_system = vshadow_volume_system.VShadowVolumeSystem()
volume_system.Open(scan_node.path_spec)

# TODO: refactor to front-end.
volume_identifiers = self._source_scanner.GetVolumeIdentifiers(
volume_system)
if not volume_identifiers:
Expand Down Expand Up @@ -340,7 +337,7 @@ def _ParseVSSProcessingOptions(self, options):
Raises:
BadConfigOption: if the options are invalid.
"""
self._process_vss = not getattr(options, u'no_vss', False)
self._process_vss = not getattr(options, u'no_vss', True)
if self._process_vss:
vss_stores = getattr(options, u'vss_stores', None)
else:
Expand Down Expand Up @@ -726,36 +723,42 @@ def _ScanVolumeScanNodeEncrypted(self, scan_context, volume_scan_node):
scan_context, volume_scan_node, credentials)

if result:
# TODO: instead of hard coding TSK scan for the file system.
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=volume_scan_node.path_spec)
self._source_path_specs.append(path_spec)
self._source_scanner.Scan(
scan_context, scan_path_spec=volume_scan_node.path_spec)
self._ScanVolume(scan_context, volume_scan_node)

def _ScanVolumeScanNodeVSS(self, unused_scan_context, volume_scan_node):
def _ScanVolumeScanNodeVSS(self, scan_context, volume_scan_node):
"""Scans a VSS volume scan node for volume and file systems.
Args:
scan_context: the source scanner context (instance of
SourceScannerContext).
volume_scan_node: the volume scan node (instance of dfvfs.ScanNode).
Raises:
SourceScannerError: if a VSS sub scan node scannot be retrieved.
"""
if not self._process_vss:
return

vss_store_identifiers = self._GetVSSStoreIdentifiers(
volume_scan_node, vss_stores=self._vss_stores)

self._vss_stores = vss_store_identifiers
self._vss_stores = list(vss_store_identifiers)

# Process VSS stores starting with the most recent one.
vss_store_identifiers.reverse()
for vss_store_identifier in vss_store_identifiers:
location = u'/vss{0:d}'.format(vss_store_identifier)
sub_scan_node = volume_scan_node.GetSubNodeByLocation(location)
if not sub_scan_node:
raise errors.SourceScannerError(
u'Scan node missing for VSS store identifier: {0:d}.'.format(
vss_store_identifier))

# TODO: instead of hard coding TSK scan for the file system.
path_spec = path_spec_factory.Factory.NewPathSpec(
dfvfs_definitions.TYPE_INDICATOR_TSK, location=u'/',
parent=sub_scan_node.path_spec)
self._source_path_specs.append(path_spec)
self._source_scanner.Scan(
scan_context, scan_path_spec=sub_scan_node.path_spec)
self._ScanVolume(scan_context, sub_scan_node)

def AddCredentialOptions(self, argument_group):
"""Adds the credential options to the argument group.
Expand Down Expand Up @@ -893,31 +896,40 @@ def ParseOptions(self, options):

self._source_path = os.path.abspath(self._source_path)

def ScanSource(self, front_end):
def ScanSource(self):
"""Scans the source path for volume and file systems.
This functions sets the internal source path specification and source
type values. The arguments provide the preferred source parameters
but will be ignored if they are not relevant.
This function sets the internal source path specification and source
type values.
Args:
front_end: the storage media front-end (instance of StorageMediaFrontend).
Returns:
The scan context (instance of dfvfs.ScanContext).
Raises:
SourceScannerError: if the format of or within the source is
not supported.
"""
if (not self._source_path.startswith(u'\\\\.\\') and
not os.path.exists(self._source_path)):
raise errors.SourceScannerError(
u'No such device, file or directory: {0:s}.'.format(
self._source_path))

scan_context = source_scanner.SourceScannerContext()
scan_context.OpenSourcePath(self._source_path)

try:
scan_context = front_end.ScanSource(self._source_path)
except errors.SourceScannerError:
raise
self._source_scanner.Scan(scan_context)
except (dfvfs_errors.BackEndError, ValueError) as exception:
raise errors.SourceScannerError(
u'Unable to scan source with error: {0:s}.'.format(exception))

if scan_context.source_type not in [
scan_context.SOURCE_TYPE_STORAGE_MEDIA_DEVICE,
scan_context.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]:
scan_node = scan_context.GetRootScanNode()
self._source_path_specs.append(scan_node.path_spec)
return
return scan_context

# Get the first node where where we need to decide what to process.
scan_node = scan_context.GetRootScanNode()
Expand Down Expand Up @@ -947,3 +959,5 @@ def ScanSource(self, front_end):
if not self._source_path_specs:
raise errors.SourceScannerError(
u'No supported file system found in source.')

return scan_context
77 changes: 55 additions & 22 deletions plaso/frontend/extraction_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
import pdb
import traceback

from dfvfs.helpers import source_scanner
from dfvfs.resolver import context

import plaso
from plaso import parsers # pylint: disable=unused-import
from plaso import hashers # pylint: disable=unused-import
from plaso.engine import single_process
from plaso.engine import utils as engine_utils
from plaso.frontend import frontend
from plaso.frontend import presets
from plaso.frontend import storage_media_frontend
from plaso.lib import definitions
from plaso.lib import errors
from plaso.lib import event
Expand All @@ -25,21 +28,21 @@
import pytz


class ExtractionFrontend(storage_media_frontend.StorageMediaFrontend):
class ExtractionFrontend(frontend.Frontend):
"""Class that implements an extraction front-end."""

_DEFAULT_PROFILING_SAMPLE_RATE = 1000

# Approximately 250 MB of queued items per worker.
_DEFAULT_QUEUE_SIZE = 125000


def __init__(self):
"""Initializes the front-end object."""
super(ExtractionFrontend, self).__init__()
self._buffer_size = 0
self._collection_process = None
self._debug_mode = False
self._enable_preprocessing = False
self._enable_profiling = False
self._engine = None
self._filter_expression = None
Expand All @@ -49,11 +52,11 @@ def __init__(self):
self._operating_system = None
self._output_module = None
self._parser_names = None
self._preprocess = False
self._process_archive_files = False
self._profiling_sample_rate = self._DEFAULT_PROFILING_SAMPLE_RATE
self._profiling_type = u'all'
self._queue_size = self._DEFAULT_QUEUE_SIZE
self._resolver_context = context.Context()
self._single_process_mode = False
self._show_worker_memory_information = False
self._storage_file_path = None
Expand Down Expand Up @@ -146,12 +149,13 @@ def _GetParserFilterPreset(self, os_guess=u'', os_version=u''):

return parser_filter_string

def _PreprocessSource(self, source_path_specs):
def _PreprocessSource(self, source_path_specs, source_type):
"""Preprocesses the source.
Args:
source_path_specs: list of path specifications (instances of
dfvfs.PathSpec) to process.
source_type: the dfVFS source type definition.
Returns:
The preprocessing object (instance of PreprocessObject).
Expand All @@ -167,14 +171,18 @@ def _PreprocessSource(self, source_path_specs):
if storage_information:
logging.info(u'Using preprocessing information from a prior run.')
pre_obj = storage_information[-1]
self._preprocess = False
self._enable_preprocessing = False
except IOError:
logging.warning(u'Storage file does not exist, running preprocess.')

logging.debug(u'Starting preprocessing.')

if (self._preprocess and
(self.SourceIsDirectory() or self.SourceIsStorageMediaImage())):
# TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
# to definitions.SOURCE_TYPE_.
if (self._enable_preprocessing and source_type in [
source_scanner.SourceScannerContext.SOURCE_TYPE_DIRECTORY,
source_scanner.SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_DEVICE,
source_scanner.SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]):
try:
self._engine.PreprocessSource(
source_path_specs, self._operating_system,
Expand Down Expand Up @@ -208,12 +216,13 @@ def _PreprocessSource(self, source_path_specs):
# * mount point

def _PreprocessSetCollectionInformation(
self, pre_obj, unused_engine, filter_file=None,
self, pre_obj, source_type, unused_engine, filter_file=None,
parser_filter_string=None, preferred_encoding=u'utf-8'):
"""Sets the collection information as part of the preprocessing.
Args:
pre_obj: the preprocess object (instance of PreprocessObject).
source_type: the dfVFS source type definition.
engine: the engine object (instance of BaseEngine).
filter_file: a path to a file that contains find specifications.
The default is None.
Expand All @@ -235,7 +244,7 @@ def _PreprocessSetCollectionInformation(
# TODO: extraction info:
collection_information[u'configured_zone'] = pre_obj.zone
collection_information[u'parsers'] = self._parser_names
collection_information[u'preprocess'] = self._preprocess
collection_information[u'preprocess'] = self._enable_preprocessing

if self._filter_expression:
collection_information[u'filter'] = self._filter_expression
Expand Down Expand Up @@ -269,7 +278,10 @@ def _PreprocessSetCollectionInformation(
collection_information[u'output_file'] = self._storage_file_path

# TODO: source settings:
if self.SourceIsDirectory():

# TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
# to definitions.SOURCE_TYPE_.
if source_type == source_scanner.SourceScannerContext.SOURCE_TYPE_DIRECTORY:
recursive = True
else:
recursive = False
Expand All @@ -280,7 +292,11 @@ def _PreprocessSetCollectionInformation(
# TODO: replace by scan node.
# collection_information[u'vss parsing'] = bool(self.vss_stores)

if self.SourceIsStorageMediaImage():
# TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
# to definitions.SOURCE_TYPE_.
if source_type in [
source_scanner.SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_DEVICE,
source_scanner.SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]:
collection_information[u'method'] = u'imaged processed'
# TODO: replace by scan node.
# collection_information[u'image_offset'] = self.partition_offset
Expand Down Expand Up @@ -361,8 +377,8 @@ def GetParsersInformation(self):
return parsers_manager.ParsersManager.GetParsersInformation()

def ProcessSources(
self, source_path_specs, enable_sigsegv_handler=False, filter_file=None,
hasher_names_string=None, parser_filter_string=None,
self, source_path_specs, source_type, enable_sigsegv_handler=False,
filter_file=None, hasher_names_string=None, parser_filter_string=None,
preferred_encoding=u'utf-8', single_process_mode=False,
status_update_callback=None,
storage_serializer_format=definitions.SERIALIZER_FORMAT_PROTOBUF,
Expand All @@ -372,6 +388,7 @@ def ProcessSources(
Args:
source_path_specs: list of path specifications (instances of
dfvfs.PathSpec) to process.
source_type: the dfVFS source type definition.
enable_sigsegv_handler: optional boolean value to indicate the SIGSEGV
handler should be enabled. The default is False.
filter_file: optional path to a file that contains find specifications.
Expand All @@ -397,17 +414,24 @@ def ProcessSources(
file system.
UserAbort: if the user initiated an abort.
"""
if self.SourceIsDirectory() or self.SourceIsStorageMediaImage():
# If the source is a directory or a storage media image
# run pre-processing.
self._preprocess = True
# If the source is a directory or a storage media image
# run pre-processing.
# TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
# to definitions.SOURCE_TYPE_.
if source_type in [
source_scanner.SourceScannerContext.SOURCE_TYPE_DIRECTORY,
source_scanner.SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_DEVICE,
source_scanner.SourceScannerContext.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]:
self.SetEnablePreprocessing(True)
else:
self._preprocess = False
self.SetEnablePreprocessing(False)

self._CheckStorageFile(self._storage_file_path)

self._single_process_mode = single_process_mode
if self.SourceIsFile():
# TODO: move source_scanner.SourceScannerContext.SOURCE_TYPE_
# to definitions.SOURCE_TYPE_.
if source_type == source_scanner.SourceScannerContext.SOURCE_TYPE_FILE:
# No need to multi process a single file source.
self._single_process_mode = True

Expand All @@ -423,7 +447,7 @@ def ProcessSources(
profiling_sample_rate=self._profiling_sample_rate,
profiling_type=self._profiling_type)

pre_obj = self._PreprocessSource(source_path_specs)
pre_obj = self._PreprocessSource(source_path_specs, source_type)

self._operating_system = getattr(pre_obj, u'guessed_os', None)

Expand Down Expand Up @@ -462,7 +486,7 @@ def ProcessSources(
filter_find_specs = None

self._PreprocessSetCollectionInformation(
pre_obj, self._engine, filter_file=filter_file,
pre_obj, source_type, self._engine, filter_file=filter_file,
parser_filter_string=parser_filter_string,
preferred_encoding=preferred_encoding)

Expand Down Expand Up @@ -545,6 +569,15 @@ def SetDebugMode(self, enable_debug=False):
"""
self._debug_mode = enable_debug

def SetEnablePreprocessing(self, enable_preprocessing):
"""Enables or disables preprocessing.
Args:
enable_preprocessing: boolean value to indicate if the preprocessing
should be performed.
"""
self._enable_preprocessing = enable_preprocessing

def SetEnableProfiling(
self, enable_profiling, profiling_sample_rate=1000,
profiling_type=u'all'):
Expand Down
Loading

0 comments on commit 57a083e

Please sign in to comment.