From a12b5ee4ba6a8bdc6ba5b30786a2ebabad5fa3f1 Mon Sep 17 00:00:00 2001 From: Gabriel Ebner Date: Mon, 6 Nov 2023 13:57:21 -0800 Subject: [PATCH] Add information about source file location. Fixes #18. Addresses #11. --- fstar_harness.py | 11 +++++++++++ ingest.py | 26 ++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/fstar_harness.py b/fstar_harness.py index d57ef45..36eba02 100755 --- a/fstar_harness.py +++ b/fstar_harness.py @@ -78,7 +78,18 @@ class Definition(TypedDict): opens_and_abbrevs: list[OpenOrAbbrev] vconfig: Optional[Vconfig] +class Source(TypedDict): + # Git repository name, e.g. `hacl-star` + project_name: str + # File name relative to the repository, e.g. `code/curve25519/Hacl.Impl.Curve25519.Lemmas.fst` + file_name: str + # Revision of the git repository + git_rev: str + # Url of the git repository + git_url: str + class InsightFile(TypedDict): + source: Source defs: list[Definition] dependencies: list[Dependency] diff --git a/ingest.py b/ingest.py index 3eb96b5..2bdd24b 100755 --- a/ingest.py +++ b/ingest.py @@ -9,6 +9,7 @@ import multiprocessing import tqdm import json +from fstar_harness import InsightFile def run_insights(*args): return subprocess.check_output(['fstar_insights/ocaml/bin/fstar_insights.exe'] + list(args), encoding='utf-8') @@ -19,12 +20,28 @@ def run_digest(fn) -> tuple[str, str]: def run_print_checked_deps(fn) -> tuple[str, Any, str]: return fn, json.loads(run_insights('--print_checked_deps', fn)), run_insights('--digest', fn) -def run_extract(fn): +def run_extract(fn_orig_src_fn: tuple[str, str]): + fn, orig_src_fn = fn_orig_src_fn try: - out = run_insights('--include', 'dataset', '--all_defs_and_premises', fn) - open(f'dataset/{fn}.json', 'w').write(out) + out: InsightFile = json.loads(run_insights('--include', 'dataset', '--all_defs_and_premises', fn)) except: sys.stderr.write(f'Cannot extract {fn}\n'); sys.stderr.flush() + return + + orig_dir = os.path.dirname(orig_src_fn) + git_rev = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=orig_dir, encoding='utf-8').strip() + git_repo_dir = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'], cwd=orig_dir, encoding='utf-8').strip() + source_file_name = os.path.relpath(os.path.realpath(orig_src_fn), git_repo_dir) + git_url = subprocess.check_output(['git', 'remote', 'get-url', 'origin'], cwd=orig_dir, encoding='utf-8').strip() + git_url = git_url.replace('git@github.com:', 'https://github.com/') + + out['source'] = { + 'project_name': os.path.basename(git_repo_dir), + 'file_name': source_file_name, + 'git_rev': git_rev, + 'git_url': git_url, + } + json.dump(out, open(f'dataset/{fn}.json', 'w')) def main(): os.makedirs('dataset', exist_ok=True) @@ -88,7 +105,8 @@ def resolve_checked(dig: str) -> bool: shutil.copy(src_fn, 'dataset/') shutil.copy(checked_fn, 'dataset/') - list(tqdm.tqdm(pool.imap_unordered(run_extract, basename2files.keys()), total=len(basename2files), desc='Extracting insights')) + list(tqdm.tqdm(pool.imap_unordered(run_extract, [ (bn, fn[1]) for bn, fn in basename2files.items() ]), + total=len(basename2files), desc='Extracting insights')) if __name__ == '__main__': main()