Skip to content

Commit

Permalink
Add unassign and deprecate (#219)
Browse files Browse the repository at this point in the history
* WIP: unassign

* first draft

* WIP: introduce BaseEvent and subclasses, VStage, Version

* fix 'gto show name'

* fix 'gto history'

* fix 'gto check-ref'

* adding unassign to 'history' and enabling it in 'show model'

* filter out lw tags later to speed up the git tags parsing

* add 'last-assignments-per-version' and 'last-versions-per-stage' flags

* fix 'which' and add --av and --vs

* add deregister and deprecate to README

* renamings: not implemented yet

* more details in readme + register an artifact

* add summary for GS

* minor readme update

* fix tests

* add deregister and parse git tags with regexp

* fix test, update readme

* Update README.md

Co-authored-by: tapadipti <[email protected]>

* fix readme

* Update README.md

* update README with  command

* fix couple of things in README

* WIP: add 'tag' instead of 'register' and 'promote', add 'deprecate'

* add explanations to creating git tags - update README

* implement --delete

* add 'git push' suggestion

* escape '!' in git commands suggestions

* delete tag2 tests

* readme update: use deprecate instead of unassign and deregister

* fix tests and add --force to register

* return deregister and unassign to API: it's clearer

* rename deregister to unregister

* fix deprecate in CLI

Co-authored-by: tapadipti <[email protected]>
  • Loading branch information
aguschin and tapadipti authored Aug 19, 2022
1 parent 06c95bc commit 691ca4b
Show file tree
Hide file tree
Showing 21 changed files with 2,206 additions and 981 deletions.
289 changes: 187 additions & 102 deletions README.md

Large diffs are not rendered by default.

299 changes: 188 additions & 111 deletions gto/api.py

Large diffs are not rendered by default.

563 changes: 424 additions & 139 deletions gto/base.py

Large diffs are not rendered by default.

307 changes: 195 additions & 112 deletions gto/cli.py

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions gto/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
from pydantic.env_settings import InitSettingsSource
from ruamel.yaml import YAML

from gto.constants import name_regexp
from gto.exceptions import (
UnknownStage,
UnknownType,
ValidationError,
WrongConfig,
)
from gto.ext import Enrichment, find_enrichment_types, find_enrichments
from gto.ext import EnrichmentReader, find_enrichment_types, find_enrichments

yaml = YAML(typ="safe", pure=True)
yaml.default_flow_style = False
Expand All @@ -23,7 +24,7 @@


def check_name_is_valid(name):
return bool(re.match(r"[a-z][a-z0-9-/]*[a-z0-9]$", name))
return bool(re.search(name_regexp, name))


def assert_name_is_valid(name):
Expand All @@ -38,7 +39,7 @@ class EnrichmentConfig(BaseModel):
type: str
config: Dict = {}

def load(self) -> Enrichment:
def load(self) -> EnrichmentReader:
return find_enrichment_types()[self.type](**self.config)


Expand Down Expand Up @@ -67,7 +68,7 @@ def assert_stage(self, name):
raise UnknownStage(name, self.STAGES)

@property
def enrichments(self) -> Dict[str, Enrichment]:
def enrichments(self) -> Dict[str, EnrichmentReader]:
res = {e.source: e for e in (e.load() for e in self.ENRICHMENTS)}
if self.AUTOLOAD_ENRICHMENTS:
return {**find_enrichments(), **res}
Expand Down
27 changes: 19 additions & 8 deletions gto/constants.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,42 @@
import re
from enum import Enum

COMMIT = "commit"
REF = "ref"
# COMMIT = "commit"
TAG = "tag"
# BRANCH = "branch"
# FILE = "file"

ARTIFACT = "artifact"
ACTION = "action"
TYPE = "type"
NAME = "name"
PATH = "path"
VERSION = "version"
STAGE = "stage"
NUMBER = "number"
COUNTER = "counter"


class Action(Enum):
CREATE = "create"
DEPRECATE = "deprecate"
DEREGISTER = "deregister"
REGISTER = "register"
ASSIGN = "assign"
UNASSIGN = "unassign"


class Event(Enum):
COMMIT = "commit"
REGISTRATION = "registration"
ASSIGNMENT = "assignment"
name = "[a-z][a-z0-9-/]*[a-z0-9]"
semver = r"(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
counter = "?P<counter>[0-9]+"
name_regexp = re.compile(f"^{name}$")
tag_regexp = re.compile(
f"^(?P<artifact>{name})(((#(?P<stage>{name})|@(?P<version>v{semver}))(?P<cancel>!?))|@((?P<deprecated>deprecated)|(?P<created>created)))(#({counter}))?$"
)


class VersionSort(Enum):
SemVer = "semver"
Timestamp = "timestamp"


ASSIGNMENTS_PER_VERSION = -1
VERSIONS_PER_STAGE = 1
24 changes: 23 additions & 1 deletion gto/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,24 @@ def __init__(self, stage, stages) -> None:
super().__init__(self.message)


class NoActivePromotion(GTOException):
class NoActiveAssignment(GTOException):
_message = "No version in stage '{stage}' was found for '{name}'"

def __init__(self, stage, name) -> None:
self.message = self._message.format(stage=stage, name=name)
super().__init__(self.message)


class NoStageForVersion(GTOException):
_message = "The artifact '{artifact}' version '{version}' is not in stage '{stage}'"

def __init__(self, artifact, version, stage) -> None:
self.message = self._message.format(
artifact=artifact, version=version, stage=stage
)
super().__init__(self.message)


class RefNotFound(GTOException):
_message = "Ref '{ref}' was not found in the repository history"

Expand All @@ -123,6 +133,10 @@ def __init__(self, ref) -> None:
super().__init__(self.message)


class AmbiguousArg(GTOException):
pass


class InvalidVersion(GTOException):
pass

Expand Down Expand Up @@ -171,6 +185,14 @@ def __init__(self, name) -> None:
super().__init__(self.message)


class TagNotFound(GTOException):
message = "tag '{name}' is not found"

def __init__(self, name) -> None:
self.message = self.message.format(name=name)
super().__init__(self.message)


class ValidationError(GTOException):
pass

Expand Down
18 changes: 9 additions & 9 deletions gto/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import entrypoints
from pydantic import BaseModel

ENRICHMENT_ENRTYPOINT = "gto.enrichment"
ENRICHMENT_ENTRYPOINT = "gto.enrichment"


def import_string(path):
Expand Down Expand Up @@ -34,7 +34,7 @@ def get_human_readable(self) -> str:
raise NotImplementedError


class Enrichment(BaseModel, ABC):
class EnrichmentReader(BaseModel, ABC):
source: str

@abstractmethod
Expand All @@ -55,7 +55,7 @@ def describe(
# return self.repr


# class CLIEnrichment(Enrichment):
# class CLIEnrichment(EnrichmentReader):
# cmd: str
# info_type: Union[str, Type[EnrichmentInfo]] = CLIEnrichmentInfo

Expand Down Expand Up @@ -88,28 +88,28 @@ def describe(

@lru_cache()
def _find_enrichments():
eps = entrypoints.get_group_named(ENRICHMENT_ENRTYPOINT)
eps = entrypoints.get_group_named(ENRICHMENT_ENTRYPOINT)
return {k: ep.load() for k, ep in eps.items()}


@lru_cache()
def find_enrichments() -> Dict[str, Enrichment]:
def find_enrichments() -> Dict[str, EnrichmentReader]:
enrichments = _find_enrichments()
res = {}
for name, e in enrichments.items():
# if isinstance(e, type) and issubclass(e, Enrichment) and not e.__fields_set__:
if isinstance(e, type) and issubclass(e, Enrichment):
if isinstance(e, type) and issubclass(e, EnrichmentReader):
res[name] = e()
if isinstance(e, Enrichment):
if isinstance(e, EnrichmentReader):
res[name] = e
return res


@lru_cache()
def find_enrichment_types() -> Dict[str, Type[Enrichment]]:
def find_enrichment_types() -> Dict[str, Type[EnrichmentReader]]:
enrichments = _find_enrichments()
return {
k: e
for k, e in enrichments.items()
if isinstance(e, type) and issubclass(e, Enrichment)
if isinstance(e, type) and issubclass(e, EnrichmentReader)
}
4 changes: 2 additions & 2 deletions gto/ext_dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# from pydantic import BaseModel
# from ruamel.yaml import safe_load

# from gto.ext import Enrichment, EnrichmentInfo
# from gto.ext import EnrichmentReader, EnrichmentInfo


# class DVCEnrichmentInfo(EnrichmentInfo):
Expand All @@ -18,7 +18,7 @@
# return f"""DVC-tracked [{self.size} bytes]"""


# class DVCEnrichment(Enrichment):
# class DVCEnrichment(EnrichmentReader):
# def describe(self, obj: str) -> Optional[DVCEnrichmentInfo]:
# try:
# with open(obj + ".dvc", encoding="utf8") as f:
Expand Down
4 changes: 2 additions & 2 deletions gto/ext_mlem.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# from mlem.core.objects import DatasetMeta, MlemMeta, ModelMeta
# from pydantic import BaseModel

# from gto.ext import Enrichment, EnrichmentInfo
# from gto.ext import EnrichmentReader, EnrichmentInfo


# class MlemInfo(EnrichmentInfo):
Expand All @@ -27,7 +27,7 @@
# return description


# class MlemEnrichment(Enrichment):
# class MlemEnrichment(EnrichmentReader):
# source = "mlem"

# def describe(self, repo, obj: str, rev: Optional[str]) -> Optional[MlemInfo]:
Expand Down
66 changes: 37 additions & 29 deletions gto/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
import git
from pydantic import BaseModel, parse_obj_as, validator

from gto.base import BaseManager, BaseRegistryState, BaseVersion
from gto.base import BaseManager, BaseRegistryState
from gto.base import Commit as EnrichmentEvent
from gto.config import (
CONFIG_FILE_NAME,
RegistryConfig,
Expand All @@ -26,7 +27,7 @@
PathIsUsed,
WrongArgs,
)
from gto.ext import Enrichment, EnrichmentInfo
from gto.ext import EnrichmentInfo, EnrichmentReader
from gto.utils import resolve_ref


Expand Down Expand Up @@ -182,7 +183,7 @@ def get_history(self) -> Dict[str, Index]:
raise NotImplementedError

def add(self, name, type, path, must_exist, labels, description, update):
for arg in [name] + (labels or []):
for arg in [name] + list(labels or []):
assert_name_is_valid(arg)
if type:
self.config.assert_type(type)
Expand Down Expand Up @@ -355,13 +356,25 @@ def update_state(
# processing registered artifacts and versions first
for artifact in state.get_artifacts().values():
for version in artifact.versions:
commit = self.repo.commit(version.commit_hexsha)
enrichments = self.describe(
artifact.name,
artifact.artifact,
# faster to make git.Reference here
rev=self.repo.commit(version.commit_hexsha),
rev=commit,
)
artifact.update_enrichments(
version=version.name, enrichments=enrichments
version.add_event(
EnrichmentEvent(
artifact=artifact.artifact,
version=version.version,
created_at=datetime.fromtimestamp(commit.committed_date),
author=commit.author.name,
author_email=commit.author.email,
commit_hexsha=commit.hexsha,
message=commit.message,
committer=commit.committer.name,
committer_email=commit.committer.email,
enrichments=enrichments,
)
)
state.update_artifact(artifact)
for commit in self.get_commits(
Expand All @@ -370,31 +383,26 @@ def update_state(
for art_name in GTOEnrichment().discover(self.repo, commit):
enrichments = self.describe(art_name, rev=commit)
artifact = state.find_artifact(art_name, create_new=True)
version = artifact.find_version(commit_hexsha=commit.hexsha)
# TODO: duplicated in tag.py
if version:
version = version.name
else:
artifact.add_version(
BaseVersion(
artifact=art_name,
name=commit.hexsha,
created_at=datetime.fromtimestamp(commit.committed_date),
author=commit.author.name,
author_email=commit.author.email,
commit_hexsha=commit.hexsha,
discovered=True,
)
version = artifact.find_version(
commit_hexsha=commit.hexsha, create_new=True
)
version.add_event(
EnrichmentEvent(
artifact=artifact.artifact,
version=version.version,
created_at=datetime.fromtimestamp(commit.committed_date),
author=commit.author.name,
author_email=commit.author.email,
commit_hexsha=commit.hexsha,
message=commit.message,
committer=commit.committer.name,
committer_email=commit.committer.email,
enrichments=enrichments,
)
version = commit.hexsha
artifact.update_enrichments(version=version, enrichments=enrichments)
)
state.update_artifact(artifact)
return state

def check_ref(self, ref: str, state: BaseRegistryState):
# TODO: implement
raise NotImplementedError()


def init_index_manager(path):
try:
Expand All @@ -417,7 +425,7 @@ def get_path(self):
return self.artifact.path


class GTOEnrichment(Enrichment):
class GTOEnrichment(EnrichmentReader):
source = "gto"

def discover( # pylint: disable=no-self-use
Expand Down
Loading

0 comments on commit 691ca4b

Please sign in to comment.