Skip to content

Commit

Permalink
plots: add plot markers to DVC files (#3807)
Browse files Browse the repository at this point in the history
* run: add --plot/--plot-no-cache

Part of #3409

* plot: move show/diff to API

* plots: show marked plots by default

Involves some refactoring to make plots comply with the rest of the code
base.

* dvc.yaml: use top-level metrics and plots

* dvc.yaml: add support for plot template

* plots: add support for templates in dvc.yaml

* dvc.yaml: allow metrics/plots to have persist/cache

* plots: render single page by-default and introduce --show-json

* plot: use "working tree" instead of "workspace"

Just to sync this with the rest of the codebase. We should consider
renaming it to "workspace" everywhere, but for now I opt to not touch
it to not break something that depends on it.

* plots: rename -f|--file to -o|--out

Better corresponds to other commands like `import`.

* plots: use RepoTree and brancher instead of api

* stage: get rid of OutputParams

* tests: plots: add simple diff test

* plots: use `--targets` instead of `--datafile`

Makes it comply with `dvc metrics/params` CLI options.

* tests: unit: plots: add --show-json test

* stage: cleanup exception for format error

Based on feedback from the users.

* lockfile: use relpath in corruption error
  • Loading branch information
efiop authored May 20, 2020
1 parent aaca140 commit 09136f2
Show file tree
Hide file tree
Showing 26 changed files with 631 additions and 468 deletions.
128 changes: 56 additions & 72 deletions dvc/command/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,33 @@

from dvc.command.base import CmdBase, append_doc_link, fix_subparsers
from dvc.exceptions import DvcException
from dvc.repo.plots.data import WORKSPACE_REVISION_NAME

logger = logging.getLogger(__name__)

PAGE_HTML = """<!DOCTYPE html>
<html>
<head>
<title>DVC Plot</title>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
<script src="https://cdn.jsdelivr.net/npm/[email protected]"></script>
</head>
<body>
{divs}
</body>
</html>"""

DIV_HTML = """<div id = "{id}"></div>
<script type = "text/javascript">
var spec = {vega_json};
vegaEmbed('#{id}', spec);
</script>"""


class CmdPlots(CmdBase):
def _revisions(self):
def _func(self, *args, **kwargs):
raise NotImplementedError

def _result_file(self):
if self.args.file:
return self.args.file

extension = self._result_extension()
base = self._result_basename()

result_file = base + extension
return result_file

def _result_basename(self):
if self.args.datafile:
return self.args.datafile
return "plot"

def _result_extension(self):
if not self.args.no_html:
return ".html"
elif self.args.template:
return os.path.splitext(self.args.template)[-1]
return ".json"

def run(self):
fields = None
jsonpath = None
Expand All @@ -44,33 +40,38 @@ def run(self):
else:
fields = set(self.args.select.split(","))
try:
plot_string = self.repo.plot(
datafile=self.args.datafile,
plots = self._func(
targets=self.args.targets,
template=self.args.template,
revisions=self._revisions(),
fields=fields,
x_field=self.args.x,
y_field=self.args.y,
path=jsonpath,
embed=not self.args.no_html,
csv_header=not self.args.no_csv_header,
title=self.args.title,
x_title=self.args.xlab,
y_title=self.args.ylab,
)

if self.args.stdout:
logger.info(plot_string)
else:
result_path = self._result_file()
with open(result_path, "w") as fobj:
fobj.write(plot_string)
if self.args.show_json:
import json

logger.info(json.dumps(plots))
return 0

divs = [
DIV_HTML.format(id=f"plot{i}", vega_json=plot)
for i, plot in enumerate(plots.values())
]
html = PAGE_HTML.format(divs="\n".join(divs))
path = self.args.out or "plots.html"

logger.info(
"file://{}".format(
os.path.join(self.repo.root_dir, result_path)
)
)
with open(path, "w") as fobj:
fobj.write(html)

logger.info(
"file://{}".format(os.path.join(self.repo.root_dir, path))
)

except DvcException:
logger.exception("")
Expand All @@ -80,18 +81,13 @@ def run(self):


class CmdPlotsShow(CmdPlots):
def _revisions(self):
return None
def _func(self, *args, **kwargs):
return self.repo.plots.show(*args, **kwargs)


class CmdPlotsDiff(CmdPlots):
def _revisions(self):
revisions = self.args.revisions or []
if len(revisions) <= 1:
if len(revisions) == 0 and self.repo.scm.is_dirty():
revisions.append("HEAD")
revisions.append(WORKSPACE_REVISION_NAME)
return revisions
def _func(self, *args, **kwargs):
return self.repo.plots.diff(*args, revs=self.args.revisions, **kwargs)


def add_parser(subparsers, parent_parser):
Expand Down Expand Up @@ -130,7 +126,7 @@ def add_parser(subparsers, parent_parser):
help="File to be injected with data.",
)
plots_show_parser.add_argument(
"-f", "--file", default=None, help="Name of the generated file."
"-o", "--out", default=None, help="Destination path to save plots to.",
)
plots_show_parser.add_argument(
"-s",
Expand All @@ -144,23 +140,17 @@ def add_parser(subparsers, parent_parser):
plots_show_parser.add_argument(
"-y", default=None, help="Field name for y axis."
)
plots_show_parser.add_argument(
"--stdout",
action="store_true",
default=False,
help="Print plots specification to stdout.",
)
plots_show_parser.add_argument(
"--no-csv-header",
action="store_true",
default=False,
help="Required when CSV or TSV datafile does not have a header.",
)
plots_show_parser.add_argument(
"--no-html",
"--show-json",
action="store_true",
default=False,
help="Do not wrap Vega plot JSON with HTML.",
help="Show output in JSON format.",
)
plots_show_parser.add_argument("--title", default=None, help="Plot title.")
plots_show_parser.add_argument(
Expand All @@ -170,7 +160,9 @@ def add_parser(subparsers, parent_parser):
"--ylab", default=None, help="Y axis title."
)
plots_show_parser.add_argument(
"datafile", nargs="?", default=None, help="Metrics file to visualize",
"targets",
nargs="*",
help="Metrics files to visualize. Shows all plots by default.",
)
plots_show_parser.set_defaults(func=CmdPlotsShow)

Expand All @@ -193,14 +185,12 @@ def add_parser(subparsers, parent_parser):
help="File to be injected with data.",
)
plots_diff_parser.add_argument(
"-d",
"--datafile",
nargs="?",
default=None,
help="Metrics file to visualize",
"--targets",
nargs="*",
help="Metrics file to visualize. Shows all plots by default.",
)
plots_diff_parser.add_argument(
"-f", "--file", default=None, help="Name of the generated file."
"-o", "--out", default=None, help="Destination path to save plots to.",
)
plots_diff_parser.add_argument(
"-s",
Expand All @@ -214,23 +204,17 @@ def add_parser(subparsers, parent_parser):
plots_diff_parser.add_argument(
"-y", default=None, help="Field name for y axis."
)
plots_diff_parser.add_argument(
"--stdout",
action="store_true",
default=False,
help="Print plot specification to stdout.",
)
plots_diff_parser.add_argument(
"--no-csv-header",
action="store_true",
default=False,
help="Provided CSV ot TSV datafile does not have a header.",
)
plots_diff_parser.add_argument(
"--no-html",
"--show-json",
action="store_true",
default=False,
help="Do not wrap Vega plot JSON with HTML.",
help="Show output in JSON format.",
)
plots_diff_parser.add_argument("--title", default=None, help="Plot title.")
plots_diff_parser.add_argument(
Expand Down
22 changes: 20 additions & 2 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def run(self):
self.args.outs_no_cache,
self.args.metrics,
self.args.metrics_no_cache,
self.args.plots,
self.args.plots_no_cache,
self.args.outs_persist,
self.args.outs_persist_no_cache,
self.args.params,
Expand All @@ -24,8 +26,8 @@ def run(self):
): # pragma: no cover
logger.error(
"too few arguments. Specify at least one: `-d`, `-o`, `-O`, "
"`-m`, `-M`, `-p`, `--outs-persist`, "
"`--outs-persist-no-cache`, `command`."
"`-m`, `-M`, `-p`, `--plots`, `--plots-no-cache`, "
"`--outs-persist`, `--outs-persist-no-cache`, `command`."
)
return 1

Expand All @@ -36,6 +38,8 @@ def run(self):
outs_no_cache=self.args.outs_no_cache,
metrics=self.args.metrics,
metrics_no_cache=self.args.metrics_no_cache,
plots=self.args.plots,
plots_no_cache=self.args.plots_no_cache,
deps=self.args.deps,
params=self.args.params,
fname=self.args.file,
Expand Down Expand Up @@ -140,6 +144,20 @@ def add_parser(subparsers, parent_parser):
"(do not put into DVC cache).",
metavar="<path>",
)
run_parser.add_argument(
"--plots",
action="append",
default=[],
help="Declare output plot file.",
metavar="<path>",
)
run_parser.add_argument(
"--plots-no-cache",
action="append",
default=[],
help="Declare output plot file (do not put into DVC cache).",
metavar="<path>",
)
run_parser.add_argument(
"-f",
"--file",
Expand Down
13 changes: 7 additions & 6 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@


class LockfileCorruptedError(DvcException):
def __init__(self, path):
super().__init__(f"Lockfile '{path}' is corrupted.")
pass


def is_valid_filename(path):
Expand Down Expand Up @@ -103,7 +102,7 @@ def _load(self):
with self.repo.tree.open(self.path) as fd:
stage_text = fd.read()
d = parse_stage(stage_text, self.path)
self.validate(d, self.path)
self.validate(d, self.relpath)
return d, stage_text

@classmethod
Expand All @@ -112,7 +111,7 @@ def validate(cls, d, fname=None):
try:
cls.SCHEMA(d)
except MultipleInvalid as exc:
raise StageFileFormatError(fname, exc)
raise StageFileFormatError(f"'{fname}' format error: {exc}")

def remove_with_prompt(self, force=False):
raise NotImplementedError
Expand Down Expand Up @@ -238,9 +237,11 @@ def load(self):
with self.repo.tree.open(self.path) as fd:
data = parse_stage(fd.read(), self.path)
try:
self.validate(data, fname=self.path)
self.validate(data, fname=self.relpath)
except StageFileFormatError:
raise LockfileCorruptedError(self.path)
raise LockfileCorruptedError(
f"Lockfile '{self.relpath}' is corrupted."
)
return data

def dump(self, stage, **kwargs):
Expand Down
27 changes: 23 additions & 4 deletions dvc/output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,11 @@
SCHEMA[Required(BaseOutput.PARAM_PATH)] = str
SCHEMA[BaseOutput.PARAM_CACHE] = bool
SCHEMA[BaseOutput.PARAM_METRIC] = BaseOutput.METRIC_SCHEMA
SCHEMA[BaseOutput.PARAM_PLOT] = bool
SCHEMA[BaseOutput.PARAM_PERSIST] = bool


def _get(stage, p, info, cache, metric, persist=False):
def _get(stage, p, info, cache, metric, plot=False, persist=False):
parsed = urlparse(p)

if parsed.scheme == "remote":
Expand All @@ -69,6 +70,7 @@ def _get(stage, p, info, cache, metric, persist=False):
cache=cache,
remote=remote,
metric=metric,
plot=plot,
persist=persist,
)

Expand All @@ -81,6 +83,7 @@ def _get(stage, p, info, cache, metric, persist=False):
cache=cache,
remote=None,
metric=metric,
plot=plot,
persist=persist,
)
return LocalOutput(
Expand All @@ -90,6 +93,7 @@ def _get(stage, p, info, cache, metric, persist=False):
cache=cache,
remote=None,
metric=metric,
plot=plot,
persist=persist,
)

Expand All @@ -100,19 +104,34 @@ def loadd_from(stage, d_list):
p = d.pop(BaseOutput.PARAM_PATH)
cache = d.pop(BaseOutput.PARAM_CACHE, True)
metric = d.pop(BaseOutput.PARAM_METRIC, False)
plot = d.pop(BaseOutput.PARAM_PLOT, False)
persist = d.pop(BaseOutput.PARAM_PERSIST, False)
ret.append(
_get(
stage, p, info=d, cache=cache, metric=metric, persist=persist,
stage,
p,
info=d,
cache=cache,
metric=metric,
plot=plot,
persist=persist,
)
)
return ret


def loads_from(stage, s_list, use_cache=True, metric=False, persist=False):
def loads_from(
stage, s_list, use_cache=True, metric=False, plot=False, persist=False
):
return [
_get(
stage, s, info={}, cache=use_cache, metric=metric, persist=persist,
stage,
s,
info={},
cache=use_cache,
metric=metric,
plot=plot,
persist=persist,
)
for s in s_list
]
Loading

0 comments on commit 09136f2

Please sign in to comment.