Skip to content

Commit

Permalink
index: fix storage for imports
Browse files Browse the repository at this point in the history
  • Loading branch information
efiop committed Jan 2, 2024
1 parent 987b1c4 commit 041ea17
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 26 deletions.
51 changes: 31 additions & 20 deletions dvc/repo/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,30 @@ def _load_data_from_outs(index, prefix, outs):
)


def _load_storage_from_import(storage_map, key, out):
from fsspec.utils import tokenize

from dvc_data.index import FileStorage

if out.stage.is_db_import:
return

dep = out.stage.deps[0]
if not out.hash_info:
# partial import
fs_cache = out.repo.cache.fs_cache
storage_map.add_cache(
FileStorage(
key,
fs_cache.fs,
fs_cache.fs.join(fs_cache.path, dep.fs.protocol, tokenize(dep.fs_path)),
)
)

if out.stage.is_repo_import or not out.hash_info:
storage_map.add_remote(FileStorage(key, dep.fs, dep.fs_path, read_only=True))


def _load_storage_from_out(storage_map, key, out):
from dvc.cachemgr import LEGACY_HASH_NAMES
from dvc.config import NoRemoteError
Expand All @@ -190,36 +214,23 @@ def _load_storage_from_out(storage_map, key, out):
path=remote.path,
index=remote.index,
prefix=(),
read_only=(not out.can_push),
)
)
else:
odb = (
remote.legacy_odb if out.hash_name in LEGACY_HASH_NAMES else remote.odb
)
storage_map.add_remote(ObjectStorage(key, odb, index=remote.index))
storage_map.add_remote(
ObjectStorage(
key, odb, index=remote.index, read_only=(not out.can_push)
)
)
except NoRemoteError:
pass

if out.stage.is_db_import:
return

if out.stage.is_import:
dep = out.stage.deps[0]
if not out.hash_info:
from fsspec.utils import tokenize

# partial import
fs_cache = out.repo.cache.fs_cache
storage_map.add_cache(
FileStorage(
key,
fs_cache.fs,
fs_cache.fs.join(
fs_cache.path, dep.fs.protocol, tokenize(dep.fs_path)
),
)
)
storage_map.add_remote(FileStorage(key, dep.fs, dep.fs_path, read_only=True))
_load_storage_from_import(storage_map, key, out)


class Index:
Expand Down
40 changes: 34 additions & 6 deletions tests/func/test_repo_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,32 +360,60 @@ def test_data_index(tmp_dir, dvc, local_cloud, erepo_dir):
dvc.imp(os.fspath(erepo_dir), "efoo")
dvc.imp(os.fspath(erepo_dir), "edir")

dvc.imp(os.fspath(erepo_dir), "efoo", "efoo_partial", no_download=True)
dvc.imp(os.fspath(erepo_dir), "edir", "edir_partial", no_download=True)

local_cloud.gen("ifoo", b"ifoo")
local_cloud.gen("idir", {"ibar": b"ibar", "isubdir": {"ibaz": b"ibaz"}})

dvc.imp_url(str(local_cloud / "ifoo"))
dvc.imp_url(str(local_cloud / "idir"))

dvc.imp_url(str(local_cloud / "ifoo"), "ifoo_partial", no_download=True)
dvc.imp_url(str(local_cloud / "idir"), "idir_partial", no_download=True)

index = Index.from_repo(dvc)
assert index.data_keys == {
"local": set(),
"repo": {("dir",), ("edir",), ("efoo",), ("foo",), ("idir",), ("ifoo",)},
"repo": {
("foo",),
("dir",),
("efoo",),
("edir",),
("efoo_partial",),
("edir_partial",),
("ifoo",),
("idir",),
("ifoo_partial",),
("idir_partial",),
},
}

data = index.data["repo"]
assert set(data.keys()) == {
("foo",),
("dir",),
("edir",),
("efoo",),
("foo",),
("idir",),
("edir",),
("efoo_partial",),
("edir_partial",),
("ifoo",),
("idir",),
("ifoo_partial",),
("idir_partial",),
}

assert not data.storage_map[("foo",)].remote
assert not data.storage_map[("dir",)].remote

assert data.storage_map[("efoo",)].remote.read_only
assert data.storage_map[("edir",)].remote.read_only
assert data.storage_map[("ifoo",)].remote.read_only
assert data.storage_map[("idir",)].remote.read_only

assert data.storage_map[("efoo_partial",)].remote.read_only
assert data.storage_map[("edir_partial",)].remote.read_only

assert not data.storage_map[("ifoo",)].remote
assert not data.storage_map[("idir",)].remote

assert data.storage_map[("ifoo_partial",)].remote.read_only
assert data.storage_map[("idir_partial",)].remote.read_only

0 comments on commit 041ea17

Please sign in to comment.