From 6927b3dcbd08f1668f9dda39ab6cc39f0036af3b Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 15 Mar 2023 04:44:11 +0200 Subject: [PATCH] index: data: support imports Stepping stone to simplifying `dvc fetch/pull` by using index. Fetch handles regular imports through index already, but not repo imports because their processing is much more involved (e.g. chained imports) in the current arch. With `FileStorage` support introduced into `DataIndex` and `datafs` supporting imports overall, `dvcfs` can now handle repo imports (even chained ones). This will soon allow us to handle repo imports the same way we handle regular ones, improve performance and get rid of a lot of messy code (e.g. DependencyRepo). Related https://github.com/iterative/scmrepo/issues/207 Related https://github.com/iterative/dvc-data/pull/315 Related https://github.com/iterative/studio/issues/5261 --- dvc/dependency/repo.py | 7 +++++++ dvc/repo/index.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 481d67874c..902a9811a8 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -36,11 +36,18 @@ class RepoDependency(Dependency): } def __init__(self, def_repo: Dict[str, str], stage: "Stage", *args, **kwargs): + from dvc.fs import DVCFileSystem + self.def_repo = def_repo self._objs: Dict[str, "HashFile"] = {} self._meta: Dict[str, "Meta"] = {} super().__init__(stage, *args, **kwargs) + self.fs = DVCFileSystem( + self.def_repo[self.PARAM_URL], + rev=self.def_repo.get(self.PARAM_REV_LOCK), + ) + def _parse_path(self, fs, fs_path): # noqa: ARG002 return None diff --git a/dvc/repo/index.py b/dvc/repo/index.py index 68b8fd12dc..156c0f2ebc 100644 --- a/dvc/repo/index.py +++ b/dvc/repo/index.py @@ -161,7 +161,7 @@ def _load_storage_from_out(storage_map, key, out): except NoRemoteError: pass - if out.stage.is_import and not out.stage.is_repo_import: + if out.stage.is_import: dep = out.stage.deps[0] storage_map.add_data(FileStorage(key, dep.fs, dep.fs_path))