From 87effd5a1b10c8fd6cb2d0a3a2b077e0b9544870 Mon Sep 17 00:00:00 2001 From: Uri Baghin Date: Fri, 28 Feb 2020 14:37:43 +1100 Subject: [PATCH] Incremental commit and extract. --- container/image.bzl | 47 ++++++--- container/incremental_load.sh.tpl | 53 ++++++++-- container/layer_tools.bzl | 55 ++++++++-- docker/util/BUILD | 2 - docker/util/commit.sh.tpl | 32 ------ docker/util/extract.sh.tpl | 28 ----- docker/util/run.bzl | 163 +++++++++++++----------------- 7 files changed, 195 insertions(+), 185 deletions(-) delete mode 100644 docker/util/commit.sh.tpl delete mode 100644 docker/util/extract.sh.tpl diff --git a/container/image.bzl b/container/image.bzl index c613ada5c..99b6c5069 100644 --- a/container/image.bzl +++ b/container/image.bzl @@ -311,7 +311,14 @@ def _impl( output_layer = None, workdir = None, null_cmd = None, - null_entrypoint = None): + null_entrypoint = None, + docker_run_flags = "", + commit = False, + commit_output = None, + extract = False, + extract_path = None, + extract_output = None, + action_run = False): """Implementation for the container_image rule. Args: @@ -344,6 +351,13 @@ def _impl( workdir: str, overrides ctx.attr.workdir null_cmd: bool, overrides ctx.attr.null_cmd null_entrypoint: bool, overrides ctx.attr.null_entrypoint + docker_run_flags: str List, flags to pass to docker when running the container + commit: bool, whether to run the container and commit the result + commit_output: File to use as output for the committed container + extract: bool, whether to run the container and extract a file from it + extract_path: str, path to the file to extract from the container + extract_output: File to copy the extract file to + action_run: bool, whether output_executable is going to be run as an action """ name = name or ctx.label.name entrypoint = entrypoint or ctx.attr.entrypoint @@ -359,7 +373,6 @@ def _impl( output_digest = output_digest or ctx.outputs.digest output_config = output_config or ctx.outputs.config output_layer = output_layer or ctx.outputs.layer - build_script = ctx.outputs.build_script null_cmd = null_cmd or ctx.attr.null_cmd null_entrypoint = null_entrypoint or ctx.attr.null_entrypoint @@ -369,15 +382,16 @@ def _impl( # We do not use the default argument of attrs.string() in order to distinguish between # an image using the default and an image intentionally overriding the base's run flags. # Since this is a string attribute, the default value is the empty string. - if ctx.attr.docker_run_flags != "": - docker_run_flags = ctx.attr.docker_run_flags - elif ctx.attr.base and ImageInfo in ctx.attr.base: - docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags - else: - # Run the container using host networking, so that the service is - # available to the developer without having to poke around with - # docker inspect. - docker_run_flags = "-i --rm --network=host" + if docker_run_flags == "": + if ctx.attr.docker_run_flags != "": + docker_run_flags = ctx.attr.docker_run_flags + elif ctx.attr.base and ImageInfo in ctx.attr.base: + docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags + else: + # Run the container using host networking, so that the service is + # available to the developer without having to poke around with + # docker inspect. + docker_run_flags = "-i --rm --network=host" if ctx.attr.launcher: if not file_map: @@ -491,12 +505,21 @@ def _impl( tag_name: container_parts, } + commit_base_config = parent_parts.get("config") _incr_load( ctx, images, build_executable, run = not ctx.attr.legacy_run_behavior, run_flags = docker_run_flags, + commit = commit, + commit_name = tag_name + "_commit_output", + commit_base_config = commit_base_config, + commit_output = commit_output, + extract = extract, + extract_path = extract_path, + extract_output = extract_output, + action_run = action_run, ) _assemble_image( @@ -515,7 +538,7 @@ def _impl( ) runfiles = ctx.runfiles( - files = unzipped_layers + diff_ids + [config_file, config_digest] + + files = unzipped_layers + diff_ids + [commit_base_config, config_file, config_digest] + ([container_parts["legacy"]] if container_parts["legacy"] else []), ) diff --git a/container/incremental_load.sh.tpl b/container/incremental_load.sh.tpl index 4a4be51aa..c31bf9637 100644 --- a/container/incremental_load.sh.tpl +++ b/container/incremental_load.sh.tpl @@ -19,14 +19,19 @@ set -eu # This is a generated file that loads all docker layers built by "docker_build". function guess_runfiles() { + if [[ "%{action_run}" == "True" ]]; then + # The script is running as an action + pwd + else if [ -d ${BASH_SOURCE[0]}.runfiles ]; then - # Runfiles are adjacent to the current script. - echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )" + # Runfiles are adjacent to the current script. + echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )" else - # The current script is within some other script's runfiles. - mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|' + # The current script is within some other script's runfiles. + mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|' fi + fi } RUNFILES="${PYTHON_RUNFILES:-$(guess_runfiles)}" @@ -35,8 +40,8 @@ DOCKER="%{docker_tool_path}" DOCKER_FLAGS="%{docker_flags}" if [[ -z "${DOCKER}" ]]; then - echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?" - exit 1 + echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?" + exit 1 fi # Create temporary files in which to record things to clean up. @@ -130,7 +135,7 @@ function import_config() { local tmp_dir="$(mktemp -d)" echo "${tmp_dir}" >> "${TEMP_FILES}" - cd "${tmp_dir}" + pushd "${tmp_dir}" >/dev/null # Docker elides layer reads from the tarball when it # already has a copy of the layer with the same basis @@ -206,6 +211,8 @@ EOF # and then streaming exactly the layers we've established are # needed into the Docker daemon. tar cPh "${MISSING[@]}" | tee image.tar | "${DOCKER}" ${DOCKER_FLAGS} load + + popd >/dev/null } function tag_layer() { @@ -239,6 +246,36 @@ function read_variables() { # This generated and injected by docker_*. %{tag_statements} +# Optional statements to extract files from the container +if [[ "%{extract}" == "True" ]]; then + id=$(%{run_statements}) + retcode=$($DOCKER $DOCKER_FLAGS wait $id) + if [ $retcode != 0 ]; then + $DOCKER $DOCKER_FLAGS logs $id && false + fi + $DOCKER $DOCKER_FLAGS cp $id:%{extract_path} %{extract_output} + $DOCKER $DOCKER_FLAGS rm $id + exit +fi + +# Optional statements to commit changes to the container as a new container +if [[ "%{commit}" == "True" ]]; then + id=$(%{run_statements}) + retcode=$($DOCKER $DOCKER_FLAGS wait $id) + if [ $retcode != 0 ]; then + $DOCKER $DOCKER_FLAGS logs $id && false + fi + config=$(< %{commit_base_config}) + cmd='["/bin/sh", "-c", "/bin/bash"]' + regex='\"Cmd\" ?: ?(\[[^]]*\])' + if [[ config =~ regex ]]; then + cmd=${BASH_REMATCH[1]} + fi + $DOCKER $DOCKER_FLAGS commit -c "CMD $cmd" $id %{output_image} + $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar} + $DOCKER $DOCKER_FLAGS rm $id +fi + # An optional "docker run" statement for invoking a loaded container. # This is not executed if the single argument --norun is passed or # no run_statements are generated (in which case, 'run' is 'False'). diff --git a/container/layer_tools.bzl b/container/layer_tools.bzl index 20d8f8d78..64024d00a 100644 --- a/container/layer_tools.bzl +++ b/container/layer_tools.bzl @@ -189,7 +189,15 @@ def incremental_load( output, stamp = False, run = False, - run_flags = None): + run_flags = None, + commit = False, + commit_name = None, + commit_base_config = None, + commit_output = None, + extract = False, + extract_path = None, + extract_output = None, + action_run = False): """Generate the incremental load statement. @@ -200,6 +208,14 @@ def incremental_load( stamp: Whether to stamp the produced image run: Whether to run the script or not run_flags: Additional run flags + commit: bool, whether to run the container and commit the result + commit_name: str, name to commit the new container as + commit_base_config: File to copy the restore the original command from + commit_output: File to use as output for the committed container + extract: bool, whether to run the container and extract a file from it + extract_path: str, path to the file to extract from the container + extract_output: File to copy the extract file to + action_run: bool, whether output_executable is going to be run as an action """ stamp_files = [] if stamp: @@ -215,9 +231,24 @@ def incremental_load( if len(images) > 1 and run: fail("Bazel run does not currently support execution of " + "multiple containers (only loading).") + if commit: + if not commit_name: + fail("Missing commit_name.") + if not commit_base_config: + fail("Missing commit_base_config.") + if not commit_output: + fail("Missing commit_output.") + if extract: + if run: + fail("Cannot execute a container and extract a file from it at the same time.") + if not extract_path: + fail("Missing extract_path.") + if not extract_output: + fail("Missing extract_output.") load_statements = [] tag_statements = [] + extract_statements = [] run_statements = [] # TODO(mattmoor): Consider adding cleanup_statements. @@ -227,7 +258,7 @@ def incremental_load( # First load the legacy base image, if it exists. if image.get("legacy"): load_statements += [ - "load_legacy '%s'" % _get_runfile_path(ctx, image["legacy"]), + "load_legacy '%s'" % (image["legacy"].path if action_run else _get_runfile_path(ctx, image["legacy"])), ] pairs = zip(image["diff_id"], image["unzipped_layer"]) @@ -236,11 +267,11 @@ def incremental_load( # in the daemon. load_statements += [ "import_config '%s' %s" % ( - _get_runfile_path(ctx, image["config"]), + image["config"].path if action_run else _get_runfile_path(ctx, image["config"]), " ".join([ "'%s' '%s'" % ( - _get_runfile_path(ctx, diff_id), - _get_runfile_path(ctx, unzipped_layer), + diff_id.path if action_run else _get_runfile_path(ctx, diff_id), + unzipped_layer.path if action_run else _get_runfile_path(ctx, unzipped_layer), ) for (diff_id, unzipped_layer) in pairs ]), @@ -255,10 +286,10 @@ def incremental_load( # It is notable that the only legal use of '{' in a # tag would be for stamp variables, '$' is not allowed. tag_reference, - _get_runfile_path(ctx, image["config_digest"]), + image["config_digest"].path if action_run else _get_runfile_path(ctx, image["config_digest"]), ), ] - if run: + if run or commit or extract: # Args are embedded into the image, so omitted here. run_statements += [ "\"${DOCKER}\" ${DOCKER_FLAGS} run %s %s" % (run_flags, tag_reference), @@ -269,14 +300,22 @@ def incremental_load( substitutions = { "%{docker_flags}": " ".join(toolchain_info.docker_flags), "%{docker_tool_path}": toolchain_info.tool_path, + "%{action_run}": str(action_run), "%{load_statements}": "\n".join(load_statements), "%{run_statements}": "\n".join(run_statements), "%{run}": str(run), + "%{commit}": str(commit), + "%{commit_base_config}": commit_base_config.path if commit_base_config else "", + "%{output_image}": commit_name if commit_name else "", + "%{output_tar}": commit_output.path if commit_output else "", + "%{extract_path}": extract_path if extract_path else "", + "%{extract_output}": extract_output.path if extract_output else "", + "%{extract}": str(extract), # If this rule involves stamp variables than load them as bash # variables, and turn references to them into bash variable # references. "%{stamp_statements}": "\n".join([ - "read_variables %s" % _get_runfile_path(ctx, f) + "read_variables %s" % (f.path if action_run else _get_runfile_path(ctx, f)) for f in stamp_files ]), "%{tag_statements}": "\n".join(tag_statements), diff --git a/docker/util/BUILD b/docker/util/BUILD index 46f6dc6df..91dd8a08c 100644 --- a/docker/util/BUILD +++ b/docker/util/BUILD @@ -31,8 +31,6 @@ py_binary( ) exports_files([ - "commit.sh.tpl", - "extract.sh.tpl", "image_util.sh.tpl", ]) diff --git a/docker/util/commit.sh.tpl b/docker/util/commit.sh.tpl deleted file mode 100644 index 9cb14d429..000000000 --- a/docker/util/commit.sh.tpl +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash - -set -ex - -# Setup tools and load utils -TO_JSON_TOOL="%{to_json_tool}" -source %{util_script} - -# Resolve the docker tool path -DOCKER="%{docker_tool_path}" -DOCKER_FLAGS="%{docker_flags}" - -if [[ -z "$DOCKER" ]]; then - echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?" - exit 1 -fi - -# Load the image and remember its name -image_id=$(%{image_id_extractor_path} %{image_tar}) -$DOCKER $DOCKER_FLAGS load -i %{image_tar} - -id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} $image_id %{commands}) -# Actually wait for the container to finish running its commands -retcode=$($DOCKER $DOCKER_FLAGS wait $id) -# Trigger a failure if the run had a non-zero exit status -if [ $retcode != 0 ]; then - $DOCKER $DOCKER_FLAGS logs $id && false -fi - -reset_cmd $image_id $id %{output_image} -$DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar} -$DOCKER $DOCKER_FLAGS rm $id diff --git a/docker/util/extract.sh.tpl b/docker/util/extract.sh.tpl deleted file mode 100644 index 8fb315df8..000000000 --- a/docker/util/extract.sh.tpl +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -set -ex - -# Resolve the docker tool path -DOCKER="%{docker_tool_path}" -DOCKER_FLAGS="%{docker_flags}" - -if [[ -z "$DOCKER" ]]; then - echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?" - exit 1 -fi - -# Load the image and remember its name -image_id=$(%{image_id_extractor_path} %{image_tar}) -$DOCKER $DOCKER_FLAGS load -i %{image_tar} - -id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} $image_id %{commands}) - -retcode=$($DOCKER $DOCKER_FLAGS wait $id) - -# Print any error that occurred in the container. -if [ $retcode != 0 ]; then - $DOCKER $DOCKER_FLAGS logs $id && false -fi - -$DOCKER $DOCKER_FLAGS cp $id:%{extract_file} %{output} -$DOCKER $DOCKER_FLAGS rm $id diff --git a/docker/util/run.bzl b/docker/util/run.bzl index d745a3d19..ce9797f8a 100644 --- a/docker/util/run.bzl +++ b/docker/util/run.bzl @@ -18,6 +18,9 @@ to new container image, or extract specified targets to a directory on the host machine. """ +load("@bazel_skylib//lib:dicts.bzl", "dicts") +load("//container:image.bzl", _image = "image") + def _extract_impl( ctx, name = "", @@ -54,38 +57,47 @@ def _extract_impl( docker_run_flags = docker_run_flags or ctx.attr.docker_run_flags extract_file = extract_file or ctx.attr.extract_file output_file = output_file or ctx.outputs.out - script = script_file or ctx.outputs.script + script_file = script_file or ctx.outputs.script + output_tarball = ctx.actions.declare_file(name + ".tar") + output_digest = ctx.actions.declare_file(name + ".digest") + output_config = ctx.actions.declare_file(name + ".config") + output_layer = ctx.actions.declare_file(name + ".layer") - toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info + if not "-d" in docker_run_flags: + docker_run_flags = docker_run_flags + ["-d"] - # Generate a shell script to execute the run statement - ctx.actions.expand_template( - template = ctx.file._extract_tpl, - output = script, - substitutions = { - "%{commands}": _process_commands(commands), - "%{docker_flags}": " ".join(toolchain_info.docker_flags), - "%{docker_run_flags}": " ".join(docker_run_flags), - "%{docker_tool_path}": toolchain_info.tool_path, - "%{extract_file}": extract_file, - "%{image_id_extractor_path}": ctx.executable._extract_image_id.path, - "%{image_tar}": image.path, - "%{output}": output_file.path, - }, - is_executable = True, + image_result = _image.implementation( + ctx, + name, + image, + cmd = _process_commands(commands), + output_executable = script_file, + output_tarball = output_tarball, + output_digest = output_digest, + output_config = output_config, + output_layer = output_layer, + docker_run_flags = " ".join(docker_run_flags), + extract = True, + extract_path = extract_file, + extract_output = output_file, + action_run = True, ) ctx.actions.run( - inputs = extra_deps if extra_deps else [], + executable = script_file, + tools = image_result[1].default_runfiles.files, + inputs = [image], outputs = [output_file], - tools = [image, ctx.executable._extract_image_id], - executable = script, use_default_shell_env = True, ) - return struct() + return [ + DefaultInfo( + files = depset([script_file, output_file]), + ), + ] -_extract_attrs = { +_extract_attrs = dicts.add(_image.attrs, { "commands": attr.string_list( doc = "A list of commands to run (sequentially) in the container.", mandatory = True, @@ -106,17 +118,7 @@ _extract_attrs = { allow_single_file = True, cfg = "target", ), - "_extract_image_id": attr.label( - default = Label("//contrib:extract_image_id"), - cfg = "host", - executable = True, - allow_files = True, - ), - "_extract_tpl": attr.label( - default = Label("//docker/util:extract.sh.tpl"), - allow_single_file = True, - ), -} +}) _extract_outputs = { "out": "%{name}%{extract_file}", @@ -166,58 +168,49 @@ def _commit_impl( image = image or ctx.file.image commands = commands or ctx.attr.commands docker_run_flags = docker_run_flags or ctx.attr.docker_run_flags - script = ctx.actions.declare_file(name + ".build") + script = ctx.outputs.build output_image_tar = output_image_tar or ctx.outputs.out - toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info + if not "-d" in docker_run_flags: + docker_run_flags = docker_run_flags + ["-d"] - # Generate a shell script to execute the reset cmd - image_utils = ctx.actions.declare_file("image_util.sh") - ctx.actions.expand_template( - template = ctx.file._image_utils_tpl, - output = image_utils, - substitutions = { - "%{docker_flags}": " ".join(toolchain_info.docker_flags), - "%{docker_tool_path}": toolchain_info.tool_path, - }, - is_executable = True, - ) + run_image = "%s.run" % name + run_image_output_tarball = ctx.actions.declare_file("%s.tar" % run_image) + run_image_output_layer = ctx.actions.declare_file("%s-layer.tar" % run_image) + run_image_output_digest = ctx.actions.declare_file("%s.digest" % run_image) + run_image_output_config = ctx.actions.declare_file("%s.json" % run_image) - # Generate a shell script to execute the run statement - ctx.actions.expand_template( - template = ctx.file._run_tpl, - output = script, - substitutions = { - "%{commands}": _process_commands(commands), - "%{docker_flags}": " ".join(toolchain_info.docker_flags), - "%{docker_run_flags}": " ".join(docker_run_flags), - "%{docker_tool_path}": toolchain_info.tool_path, - "%{image_id_extractor_path}": ctx.executable._extract_image_id.path, - "%{image_tar}": image.path, - "%{output_image}": "bazel/%s:%s" % ( - ctx.label.package or "default", - name, - ), - "%{output_tar}": output_image_tar.path, - "%{to_json_tool}": ctx.executable._to_json_tool.path, - "%{util_script}": image_utils.path, - }, - is_executable = True, + image_result = _image.implementation( + ctx, + name, + image, + cmd = _process_commands(commands), + output_executable = script, + output_tarball = run_image_output_tarball, + output_layer = run_image_output_layer, + output_digest = run_image_output_digest, + output_config = run_image_output_config, + docker_run_flags = " ".join(docker_run_flags), + commit = True, + commit_output = output_image_tar, + action_run = True, ) - runfiles = [image, image_utils] - ctx.actions.run( - outputs = [output_image_tar], - inputs = runfiles, executable = script, - tools = [ctx.executable._extract_image_id, ctx.executable._to_json_tool], + tools = image_result[1].default_runfiles.files, + inputs = [image], + outputs = [output_image_tar], use_default_shell_env = True, ) - return struct() + return [ + DefaultInfo( + files = depset([output_image_tar, script]), + ), + ] -_commit_attrs = { +_commit_attrs = dicts.add(_image.attrs, { "commands": attr.string_list( doc = "A list of commands to run (sequentially) in the container.", mandatory = True, @@ -233,27 +226,7 @@ _commit_attrs = { allow_single_file = True, cfg = "target", ), - "_extract_image_id": attr.label( - default = Label("//contrib:extract_image_id"), - cfg = "host", - executable = True, - allow_files = True, - ), - "_image_utils_tpl": attr.label( - default = "//docker/util:image_util.sh.tpl", - allow_single_file = True, - ), - "_run_tpl": attr.label( - default = Label("//docker/util:commit.sh.tpl"), - allow_single_file = True, - ), - "_to_json_tool": attr.label( - default = Label("//docker/util:to_json"), - cfg = "host", - executable = True, - allow_files = True, - ), -} +}) _commit_outputs = { "out": "%{name}_commit.tar", "build": "%{name}.build", @@ -279,4 +252,4 @@ commit = struct( def _process_commands(command_list): # Use the $ to allow escape characters in string - return 'sh -c $\"{0}\"'.format(" && ".join(command_list)) + return ['sh', '-c', " && ".join(command_list)]