From f5a30c3a0312a669e9f6f87acab7473618dd97cf Mon Sep 17 00:00:00 2001 From: Zion Leonahenahe Basque Date: Wed, 13 Mar 2024 10:18:33 -0700 Subject: [PATCH] Delay the imports of PyJoern for less data installs (#11) --- sailreval/__init__.py | 2 +- sailreval/analysis/counting.py | 2 +- sailreval/analysis/measure.py | 7 +++++-- sailreval/analysis/visualization.py | 22 ++++++++++++++-------- sailreval/metrics/ged_to_source.py | 2 +- sailreval/utils/compile.py | 3 ++- setup.cfg | 22 ++++++++++------------ 7 files changed, 34 insertions(+), 26 deletions(-) diff --git a/sailreval/__init__.py b/sailreval/__init__.py index 052e87e..2634dd8 100755 --- a/sailreval/__init__.py +++ b/sailreval/__init__.py @@ -1,4 +1,4 @@ -__version__ = "1.4.2" +__version__ = "1.5.0" # create loggers import logging diff --git a/sailreval/analysis/counting.py b/sailreval/analysis/counting.py index 19e5d13..c7a9afc 100644 --- a/sailreval/analysis/counting.py +++ b/sailreval/analysis/counting.py @@ -13,7 +13,6 @@ from sailreval.utils import load_tomls_by_bin_name, bcolors from sailreval.utils.sailr_target import SAILRTarget from sailreval.utils.compile import DEFAULT_OPTIMIZATION_LEVELS, OPTIMIZATION_LEVELS -from pyjoern import JoernServer, JoernClient from tqdm import tqdm import toml @@ -793,6 +792,7 @@ def _format_number(num): def _find_functions_with_switches(source_path: Path, port): source_name = source_path.name.split(".c")[0] + from pyjoern import JoernServer, JoernClient with JoernServer(port=port): client = JoernClient(source_path, port=port) functions = client.functions_with_switches() diff --git a/sailreval/analysis/measure.py b/sailreval/analysis/measure.py index 28fe995..c8ce1fb 100644 --- a/sailreval/analysis/measure.py +++ b/sailreval/analysis/measure.py @@ -10,8 +10,6 @@ from typing import List import toml -from pyjoern import JoernClient, JoernServer, fast_cfgs_from_source -from pyjoern.mapping import cfg_root_node, correct_source_cfg_addrs from sailreval import ALL_DECOMPILERS, ALL_METRICS, SAILR_DECOMPILERS, SAILR_METRICS, JOERNLESS_SERVER_METRICS from sailreval.metrics import get_metric_function, POST_METRICS @@ -33,6 +31,7 @@ def measure(filepath: Path, basename: str, metrics, functions, joern_port=9000, if not require_joern: client = None else: + from pyjoern import JoernClient if client is None: client = JoernClient(filepath, port=joern_port, bin_name=basename) @@ -91,6 +90,7 @@ def _measure_files_with_joern_server(): l.debug(f"JOERN server not needed for {basename}") measure_files(file_dir, basename, decompilers=decompilers, metrics=metrics, functions=functions, cores=cores, joern_port=joern_port, require_joern=False, cache_dir=cache_dir) else: + from pyjoern import JoernServer try: with JoernServer(port=joern_port): measure_files(file_dir, basename, decompilers=decompilers, metrics=metrics, functions=functions, cores=cores, joern_port=joern_port, cache_dir=cache_dir) @@ -183,6 +183,9 @@ def measure_files(file_dir: Path, basename: str, decompilers=None, metrics=None, SAILR_METRICS.CODE_COMPLEXITY, SAILR_METRICS.GED_EXACT ) ) + if require_cfgs: + from pyjoern import fast_cfgs_from_source + from pyjoern.mapping import correct_source_cfg_addrs # collect the source CFGs first if they are needed source_cfgs, dec_cfgs = {}, {} diff --git a/sailreval/analysis/visualization.py b/sailreval/analysis/visualization.py index 0bafc49..16f1749 100644 --- a/sailreval/analysis/visualization.py +++ b/sailreval/analysis/visualization.py @@ -1,22 +1,16 @@ -from pathlib import Path import textwrap from typing import Dict, List -import matplotlib as mpl -mpl.use('Agg') -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -#import seaborn as sns STAT_TYPES = ("sum", "mean", "median") - # # Graphs # def plot_histogram(data1, data2, save_path="./histo_chart.png"): + import matplotlib as mpl + mpl.use('Agg') import matplotlib.pyplot as plt import numpy as np @@ -58,6 +52,11 @@ def plot_histogram(data1, data2, save_path="./histo_chart.png"): def plot_diff_barchar(data_by_row: Dict[str, List], metric: str, decompilers: List, save_path="./diff_bar_chart.png"): + import matplotlib as mpl + mpl.use('Agg') + import matplotlib.pyplot as plt + import pandas as pd + assert len(decompilers) == 2 metric_data = data_by_row[metric] @@ -79,6 +78,10 @@ def plot_diff_barchar(data_by_row: Dict[str, List], metric: str, decompilers: Li def plot_barchart(data_by_row: Dict[str, List], save_path="./bar_chart.png"): + import matplotlib as mpl + mpl.use('Agg') + import matplotlib.pyplot as plt + import pandas as pd data = { 'Function': range(len(data_by_row["angr_sailr"])), 'angr_phoenix': data_by_row["angr_phoenix"], @@ -104,6 +107,9 @@ def plot_barchart(data_by_row: Dict[str, List], save_path="./bar_chart.png"): def plot_box_plot(data_by_row: Dict[str, List], save_path="./boxplot.png"): + import matplotlib as mpl + mpl.use('Agg') + import matplotlib.pyplot as plt plt.figure(figsize=(10, 6)) # Set the figure size plt.boxplot(data_by_row.values(), vert=False, widths=0.7) # Plot the data diff --git a/sailreval/metrics/ged_to_source.py b/sailreval/metrics/ged_to_source.py index b18446f..ff12393 100644 --- a/sailreval/metrics/ged_to_source.py +++ b/sailreval/metrics/ged_to_source.py @@ -2,7 +2,6 @@ from pathlib import Path from typing import Dict -from pyjoern.mapping import correct_decompiler_mappings, read_line_maps from cfgutils.similarity import ged_max, ged_upperbound, ged_exact from cfgutils.similarity import cfg_edit_distance as _cfg_edit_distance import networkx as nx @@ -81,6 +80,7 @@ def cfg_edit_distance( def compute_cfg_edit_distance(dec_cfg, src_cfg, function, binary_path: Path, decompiler: str): + from pyjoern.mapping import correct_decompiler_mappings, read_line_maps binary_path = Path(binary_path) bin_dir = binary_path.parent bin_name = binary_path.with_suffix("").name diff --git a/sailreval/utils/compile.py b/sailreval/utils/compile.py index e4b98f6..b298c70 100644 --- a/sailreval/utils/compile.py +++ b/sailreval/utils/compile.py @@ -11,7 +11,6 @@ import toml from tqdm import tqdm -from pyjoern import JoernClient, JoernServer from .sailr_target import SAILRTarget from ..utils import timeout @@ -148,6 +147,8 @@ def _collect_function_lines( cachable_results = {} basename = i_file.with_suffix("").with_suffix("").name + from pyjoern import JoernClient, JoernServer + def _eval_with_server(joern_port, src_file, i_file): with JoernServer(port=joern_port): src_client = JoernClient(src_file, port=joern_port) diff --git a/setup.cfg b/setup.cfg index 5739fa5..642de2c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,7 +5,7 @@ url = https://github.com/mahaloz/sailr-eval classifiers = License :: OSI Approved :: BSD License Programming Language :: Python :: 3 - Programming Language :: Python :: 3.6 + Programming Language :: Python :: 3.8 license = BSD 2 Clause license_files = LICENSE description = The SAILR Evaluation Pipeline @@ -15,21 +15,19 @@ long_description_content_type = text/markdown [options] install_requires = networkx - graphviz - pygraphviz toml - psutil tqdm - docker - matplotlib - numpy - pandas - scipy - seaborn pyelftools cfgutils>=1.8.1 pyjoern==1.2.18.6 - -python_requires = >= 3.6 +python_requires = >= 3.8 packages = find: include_package_data = True + +[options.extras_require] +viz = + matplotlib + numpy + pandas + seaborn +