From aaab1e4f93e23106cdbf69cf83975750f676ba9d Mon Sep 17 00:00:00 2001 From: Eugene Daily <edaily@canva.com> Date: Wed, 16 Feb 2022 16:04:24 +1100 Subject: [PATCH] incremental extract --- container/image.bzl | 31 +- container/incremental_load.sh.tpl | 40 +- container/layer_tools.bzl | 17 +- docker/util/commit.sh.tpl | 34 +- docker/util/commit_layer.sh.tpl | 44 ++- docker/util/extract.sh.tpl | 17 +- docker/util/image_util.sh.tpl | 24 +- docker/util/run.bzl | 608 +++++++++++++++++++++++++++--- docs/container.md | 4 +- tests/docker/util/BUILD | 25 ++ 10 files changed, 724 insertions(+), 120 deletions(-) diff --git a/container/image.bzl b/container/image.bzl index fd2b72962..5a8b86058 100644 --- a/container/image.bzl +++ b/container/image.bzl @@ -297,7 +297,9 @@ def _impl( output_layer = None, workdir = None, null_cmd = None, - null_entrypoint = None): + null_entrypoint = None, + action_run = False, + docker_run_flags = None): """Implementation for the container_image rule. You can write a customized container_image rule by writing something like: @@ -354,6 +356,7 @@ def _impl( workdir: str, overrides ctx.attr.workdir null_cmd: bool, overrides ctx.attr.null_cmd null_entrypoint: bool, overrides ctx.attr.null_entrypoint + action_run: bool, whether output_executable is going to be run as an action """ name = name or ctx.label.name entrypoint = entrypoint or ctx.attr.entrypoint @@ -371,7 +374,6 @@ def _impl( output_config = output_config or ctx.outputs.config output_config_digest = output_config_digest or ctx.outputs.config_digest output_layer = output_layer or ctx.outputs.layer - build_script = ctx.outputs.build_script null_cmd = null_cmd or ctx.attr.null_cmd null_entrypoint = null_entrypoint or ctx.attr.null_entrypoint @@ -381,15 +383,19 @@ def _impl( # We do not use the default argument of attrs.string() in order to distinguish between # an image using the default and an image intentionally overriding the base's run flags. # Since this is a string attribute, the default value is the empty string. - if ctx.attr.docker_run_flags != "": - docker_run_flags = ctx.attr.docker_run_flags - elif ctx.attr.base and ImageInfo in ctx.attr.base: - docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags - else: - # Run the container using host networking, so that the service is - # available to the developer without having to poke around with - # docker inspect. - docker_run_flags = "-i --rm --network=host" + docker_run_flags_are_default = False + if docker_run_flags == None: + if ctx.attr.docker_run_flags != "": + docker_run_flags = ctx.attr.docker_run_flags + elif ctx.attr.base and ImageInfo in ctx.attr.base: + docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags + else: + docker_run_flags_are_default = True + + # Run the container using host networking, so that the service is + # available to the developer without having to poke around with + # docker inspect. + docker_run_flags = "-i --rm --network=host" if ctx.attr.launcher: if not file_map: @@ -509,6 +515,7 @@ def _impl( build_executable, run = not ctx.attr.legacy_run_behavior, run_flags = docker_run_flags, + action_run = action_run, ) _assemble_image( @@ -540,7 +547,7 @@ def _impl( ImageInfo( container_parts = container_parts, legacy_run_behavior = ctx.attr.legacy_run_behavior, - docker_run_flags = docker_run_flags, + docker_run_flags = "" if docker_run_flags_are_default and not ctx.attr.legacy_run_behavior else docker_run_flags, ), DefaultInfo( executable = build_executable, diff --git a/container/incremental_load.sh.tpl b/container/incremental_load.sh.tpl index b0e0157c8..e6a043eba 100644 --- a/container/incremental_load.sh.tpl +++ b/container/incremental_load.sh.tpl @@ -19,13 +19,18 @@ set -eu # This is a generated file that loads all docker layers built by "docker_build". function guess_runfiles() { - if [ -d ${BASH_SOURCE[0]}.runfiles ]; then - # Runfiles are adjacent to the current script. - echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )" + if [[ "%{action_run}" == "True" ]]; then + # The script is running as an action + pwd else - # The current script is within some other script's runfiles. - mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|' + if [ -d ${BASH_SOURCE[0]}.runfiles ]; then + # Runfiles are adjacent to the current script. + echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )" + else + # The current script is within some other script's runfiles. + mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|' + fi fi } @@ -136,7 +141,7 @@ function import_config() { local tmp_dir="$(mktemp -d)" echo "${tmp_dir}" >> "${TEMP_FILES}" - cd "${tmp_dir}" + pushd "${tmp_dir}" >/dev/null # Docker elides layer reads from the tarball when it # already has a copy of the layer with the same basis @@ -212,6 +217,8 @@ EOF # and then streaming exactly the layers we've established are # needed into the Docker daemon. tar cPh "${MISSING[@]}" | "${DOCKER}" ${DOCKER_FLAGS} load + + popd >/dev/null } function tag_layer() { @@ -279,13 +286,10 @@ if [[ "%{run}" == "True" ]]; then esac done - # Once we've loaded the images for all layers, we no longer need the temporary files on disk. - # We can clean up before we exec docker, since the exit handler will no longer run. - cleanup - + # This generated and injected by docker_*. + args=(%{run_statement}) # Bash treats empty arrays as unset variables for the purposes of `set -u`, so we only # conditionally add these arrays to our args. - args=(%{run_statement}) if [[ ${#docker_args[@]} -gt 0 ]]; then args+=("${docker_args[@]}") fi @@ -294,6 +298,14 @@ if [[ "%{run}" == "True" ]]; then args+=("${container_args[@]}") fi - # This generated and injected by docker_*. - eval exec "${args[@]}" + if [[ "%{action_run}" == "True" ]]; then + # This will be used by other scripts that are concatenated to this one. + id=$("${args[@]}") + else + # Once we've loaded the images for all layers, we no longer need the temporary files on disk. + # We can clean up before we exec docker, since the exit handler will no longer run. + cleanup + + eval exec "${args[@]}" + fi fi diff --git a/container/layer_tools.bzl b/container/layer_tools.bzl index a5ec1f693..5a455d9c4 100644 --- a/container/layer_tools.bzl +++ b/container/layer_tools.bzl @@ -196,7 +196,8 @@ def incremental_load( output, stamp = False, run = False, - run_flags = None): + run_flags = None, + action_run = False): """Generate the incremental load statement. @@ -207,6 +208,7 @@ def incremental_load( stamp: Whether to stamp the produced image run: Whether to run the script or not run_flags: Additional run flags + action_run: bool, whether output_executable is going to be run as an action """ stamp_files = [] if stamp: @@ -237,7 +239,7 @@ def incremental_load( # First load the legacy base image, if it exists. if image.get("legacy"): load_statements.append( - "load_legacy '%s'" % _get_runfile_path(ctx, image["legacy"]), + "load_legacy '%s'" % (image["legacy"].path if action_run else _get_runfile_path(ctx, image["legacy"])), ) pairs = zip(image["diff_id"], image["unzipped_layer"]) @@ -246,11 +248,11 @@ def incremental_load( # in the daemon. load_statements.append( "import_config '%s' %s" % ( - _get_runfile_path(ctx, image["config"]), + image["config"].path if action_run else _get_runfile_path(ctx, image["config"]), " ".join([ "'%s' '%s'" % ( - _get_runfile_path(ctx, diff_id), - _get_runfile_path(ctx, unzipped_layer), + diff_id.path if action_run else _get_runfile_path(ctx, diff_id), + unzipped_layer.path if action_run else _get_runfile_path(ctx, unzipped_layer), ) for (diff_id, unzipped_layer) in pairs ]), @@ -265,7 +267,7 @@ def incremental_load( # It is notable that the only legal use of '{' in a # tag would be for stamp variables, '$' is not allowed. tag_reference, - _get_runfile_path(ctx, image["config_digest"]), + image["config_digest"].path if action_run else _get_runfile_path(ctx, image["config_digest"]), ), ) @@ -274,6 +276,7 @@ def incremental_load( substitutions = { "%{docker_flags}": " ".join(toolchain_info.docker_flags), "%{docker_tool_path}": docker_path(toolchain_info), + "%{action_run}": str(action_run), "%{load_statements}": "\n".join(load_statements), "%{run_statement}": run_statement, "%{run_tag}": run_tag, @@ -282,7 +285,7 @@ def incremental_load( # variables, and turn references to them into bash variable # references. "%{stamp_statements}": "\n".join([ - "read_variables %s" % _get_runfile_path(ctx, f) + "read_variables %s" % (f.path if action_run else _get_runfile_path(ctx, f)) for f in stamp_files ]), "%{tag_statements}": "\n".join(tag_statements), diff --git a/docker/util/commit.sh.tpl b/docker/util/commit.sh.tpl index 44a2bec93..1dffb82bf 100644 --- a/docker/util/commit.sh.tpl +++ b/docker/util/commit.sh.tpl @@ -15,20 +15,34 @@ if [[ -z "$DOCKER" ]]; then exit 1 fi -logfile=$(output_logfile) +# Redirect output to a log so we can be silent on success +# intentionally don't use traps here as there might already be traps set +logfile=$(mktemp) if ! ( - # Load the image and remember its name - image_id=$(%{image_id_extractor_path} %{image_tar}) - $DOCKER $DOCKER_FLAGS load -i %{image_tar} - - readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} $image_id %{commands}) retcode=0 - if $DOCKER $DOCKER_FLAGS start -a "${id}"; then - reset_cmd $image_id $id %{output_image} - $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar} + + if %{legacy_load_behavior}; then + # Load the image and remember its name + image_id=$(%{image_id_extractor_path} %{image_tar}) + $DOCKER $DOCKER_FLAGS load -i %{image_tar} + + readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} $image_id %{commands}) + if $DOCKER $DOCKER_FLAGS start -a "${id}"; then + reset_cmd $image_id $id %{output_image} + $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar} + else + retcode=$? + fi else - retcode=$? + # Actually wait for the container to finish running its commands + retcode=$($DOCKER $DOCKER_FLAGS wait $id) + # Trigger a failure if the run had a non-zero exit status + if [ "$retcode" != 0 ]; then + $DOCKER $DOCKER_FLAGS logs $id && false + fi + reset_parent_cmd %{parent_config} $id %{output_image} + $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar} fi $DOCKER $DOCKER_FLAGS rm $id diff --git a/docker/util/commit_layer.sh.tpl b/docker/util/commit_layer.sh.tpl index 8768e5a78..52103626f 100644 --- a/docker/util/commit_layer.sh.tpl +++ b/docker/util/commit_layer.sh.tpl @@ -14,18 +14,39 @@ if [[ -z "$DOCKER" ]]; then exit 1 fi -logfile=$(output_logfile) +# Redirect output to a log so we can be silent on success +# intentionally don't use traps here as there might already be traps set +logfile=$(mktemp) if ! ( - # Load the image and remember its name - image_id=$(%{image_id_extractor_path} %{image_tar}) - $DOCKER $DOCKER_FLAGS load -i %{image_tar} - - readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands}) - retcode=0 - if $DOCKER $DOCKER_FLAGS start -a "${id}"; then - OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar" - reset_cmd $image_id $id %{output_image} + OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar" + if %{legacy_load_behavior}; then + # Load the image and remember its name + image_id=$(%{image_id_extractor_path} %{image_tar}) + $DOCKER $DOCKER_FLAGS load -i %{image_tar} + + readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands}) + retcode=0 + if $DOCKER $DOCKER_FLAGS start -a "${id}"; then + reset_cmd $image_id $id %{output_image} + $DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR + + # Extract the last layer from the image - this will be the layer generated by $DOCKER commit + %{image_last_layer_extractor_path} $OUTPUT_IMAGE_TAR %{output_layer_tar} %{output_diff_id} + + # Delete the intermediate tar + rm $OUTPUT_IMAGE_TAR + else + retcode=$? + fi + else + # Actually wait for the container to finish running its commands + retcode=$($DOCKER $DOCKER_FLAGS wait $id) + # Trigger a failure if the run had a non-zero exit status + if [ "$retcode" != 0 ]; then + $DOCKER $DOCKER_FLAGS logs $id && false + fi + reset_parent_cmd %{parent_config} $id %{output_image} $DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR # Extract the last layer from the image - this will be the layer generated by $DOCKER commit @@ -33,10 +54,9 @@ if ! ( # Delete the intermediate tar rm $OUTPUT_IMAGE_TAR - else - retcode=$? fi + # Delete the container and the intermediate image $DOCKER $DOCKER_FLAGS rm $id $DOCKER $DOCKER_FLAGS rmi %{output_image} diff --git a/docker/util/extract.sh.tpl b/docker/util/extract.sh.tpl index b886b83f3..a0757f754 100644 --- a/docker/util/extract.sh.tpl +++ b/docker/util/extract.sh.tpl @@ -12,20 +12,23 @@ if [[ -z "$DOCKER" ]]; then fi # Redirect output to a log so we can be silent on success +# intentionally don't use traps here as there might already be traps set logfile=$(mktemp) -trap "rm $logfile" EXIT if ! ( - # Load the image and remember its name - image_id=$(%{image_id_extractor_path} %{image_tar}) - $DOCKER $DOCKER_FLAGS load -i %{image_tar} - - id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} $image_id %{commands}) + if %{legacy_load_behavior}; then + # Load the image and remember its name + image_id=$(%{image_id_extractor_path} %{image_tar}) + $DOCKER $DOCKER_FLAGS load -i %{image_tar} + id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} $image_id %{commands}) + fi + retcode=$($DOCKER $DOCKER_FLAGS wait $id) + # Print any error that occurred in the container. - if [ $retcode != 0 ]; then + if [ "$retcode" != 0 ]; then $DOCKER $DOCKER_FLAGS logs $id && false exit $retcode fi diff --git a/docker/util/image_util.sh.tpl b/docker/util/image_util.sh.tpl index dfe4c288f..6660b3bd3 100755 --- a/docker/util/image_util.sh.tpl +++ b/docker/util/image_util.sh.tpl @@ -21,15 +21,21 @@ reset_cmd() { $DOCKER $DOCKER_FLAGS commit -c "CMD $fmt_cmd" "${container_id}" "${output_image_name}" } -function output_logfile { - readonly filename=$(mktemp) +reset_parent_cmd() { + local parent_config=$1 + local container_id=$2 + local output_image_name=$3 - function cleanup { - test -f "$filename" - cat "$filename" - rm "$filename" - } - trap cleanup EXIT + # Resolve the docker tool path + DOCKER="%{docker_tool_path}" + DOCKER_FLAGS="%{docker_flags}" - echo $filename + local config cmd regex + config=$(< "${parent_config}") + cmd='["/bin/sh", "-c"]' + regex='\"Cmd\" ?: ?(\[[^]]*\])' + if [[ config =~ regex ]]; then + cmd=${BASH_REMATCH[1]} + fi + $DOCKER $DOCKER_FLAGS commit -c "CMD $cmd" "${container_id}" "${output_image_name}" } diff --git a/docker/util/run.bzl b/docker/util/run.bzl index eedb488d4..f7ccb0999 100644 --- a/docker/util/run.bzl +++ b/docker/util/run.bzl @@ -23,8 +23,12 @@ load( "//skylib:hash.bzl", _hash_tools = "tools", ) +load("@io_bazel_rules_docker//container:container.bzl", _container = "container") +load("@io_bazel_rules_docker//container:image.bzl", _image = "image") load("@io_bazel_rules_docker//container:layer.bzl", "zip_layer") -load("@io_bazel_rules_docker//container:providers.bzl", "LayerInfo") +load("@io_bazel_rules_docker//container:layer_tools.bzl", _get_layers = "get_from_target") +load("@io_bazel_rules_docker//container:providers.bzl", "ImageInfo", "LayerInfo") +load("//skylib:path.bzl", _join_path = "join") load( "//skylib:zip.bzl", _zip_tools = "tools", @@ -37,9 +41,8 @@ load( def _extract_impl( ctx, name = "", - image = None, - commands = None, - docker_run_flags = None, + base = None, + cmd = None, extract_file = "", output_file = "", script_file = "", @@ -50,6 +53,137 @@ def _extract_impl( to finish, and then extracts a given file from the container to the bazel-out directory. + Args: + ctx: The bazel rule context + name: String, overrides ctx.label.name + base: File, overrides ctx.attr.base + cmd: str List, overrides ctx.attr.cmd + extract_file: File, overrides ctx.outputs.out + output_file: File, overrides ctx.outputs.output_file + script_file: File, overrides ctx.output.script_file + extra_deps: Label list, if not None these are passed as inputs + to the action running the container. This can be used if + e.g., you need to mount a directory that is produced + by another action. + """ + + name = name or ctx.label.name + extract_file = extract_file or ctx.attr.extract_file + output_file = output_file or ctx.outputs.out + script_file = script_file or ctx.outputs.script + + docker_run_flags = "" + if ctx.attr.docker_run_flags != "": + docker_run_flags = ctx.attr.docker_run_flags + elif ctx.attr.base and ImageInfo in ctx.attr.base: + docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags + if "-d" not in docker_run_flags: + docker_run_flags += " -d" + + run_image = "%s.run" % name + run_image_output_executable = ctx.actions.declare_file("%s.executable" % run_image) + run_image_output_tarball = ctx.actions.declare_file("%s.tar" % run_image) + run_image_output_config = ctx.actions.declare_file("%s.json" % run_image) + run_image_output_config_digest = ctx.actions.declare_file("%s.json.sha256" % run_image) + run_image_output_digest = ctx.actions.declare_file("%s.digest" % run_image) + run_image_output_layer = ctx.actions.declare_file("%s-layer.tar" % run_image) + + image_result = _image.implementation( + ctx, + name, + base = base, + cmd = cmd, + output_executable = run_image_output_executable, + output_tarball = run_image_output_tarball, + output_config = run_image_output_config, + output_config_digest = run_image_output_config_digest, + output_digest = run_image_output_digest, + output_layer = run_image_output_layer, + action_run = True, + docker_run_flags = docker_run_flags, + ) + + footer = ctx.actions.declare_file(name + "_footer.sh") + + ctx.actions.expand_template( + template = ctx.file._extract_tpl, + output = footer, + substitutions = { + "%{extract_file}": extract_file, + "%{legacy_load_behavior}": "false", + "%{output}": output_file.path, + }, + ) + + ctx.actions.run_shell( + inputs = [run_image_output_executable, footer], + outputs = [script_file], + mnemonic = "Concat", + command = """ + set -eu + cat {first} {second} > {output} + """.format( + first = run_image_output_executable.path, + second = footer.path, + output = script_file.path, + ), + ) + + ctx.actions.run( + executable = script_file, + tools = image_result[1].default_runfiles.files, + outputs = [output_file], + use_default_shell_env = True, + ) + + return [ + DefaultInfo( + files = depset([script_file, output_file]), + ), + ] + +_extract_attrs = dicts.add(_image.attrs, { + "extract_file": attr.string( + doc = "Path to file to extract from container.", + mandatory = True, + ), + "legacy_run_behavior": attr.bool( + default = False, + ), + "_extract_tpl": attr.label( + default = Label("//docker/util:extract.sh.tpl"), + allow_single_file = True, + ), +}) + +_extract_outputs = { + "out": "%{name}%{extract_file}", + "script": "%{name}.build", +} + +container_run_and_extract_rule = rule( + attrs = _extract_attrs, + cfg = _container.image.cfg, + doc = ("This rule runs a set of commands in a given image, waits" + + "for the commands to finish, and then extracts a given file" + + " from the container to the bazel-out directory."), + outputs = _extract_outputs, + implementation = _extract_impl, + toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"], +) + +def _extract_impl_legacy( + ctx, + name = "", + image = None, + commands = None, + docker_run_flags = None, + extract_file = "", + output_file = "", + script_file = "", + extra_deps = None): + """Legacy implementation for the container_run_and_extract rule. + Args: ctx: The bazel rule context name: String, overrides ctx.label.name @@ -87,6 +221,7 @@ def _extract_impl( "%{extract_file}": extract_file, "%{image_id_extractor_path}": ctx.executable._extract_image_id.path, "%{image_tar}": image.path, + "%{legacy_load_behavior}": "true", "%{output}": output_file.path, }, is_executable = True, @@ -102,7 +237,7 @@ def _extract_impl( return [] -_extract_attrs = { +_extract_attrs_legacy = { "commands": attr.string_list( doc = "A list of commands to run (sequentially) in the container.", mandatory = True, @@ -140,39 +275,198 @@ _extract_attrs = { ), } -_extract_outputs = { - "out": "%{name}%{extract_file}", - "script": "%{name}.build", -} +container_run_and_extract_legacy = rule( + attrs = _extract_attrs_legacy, + doc = ("This rule runs a set of commands in a given image, waits" + + "for the commands to finish, and then extracts a given file" + + " from the container to the bazel-out directory."), + outputs = _extract_outputs, + implementation = _extract_impl_legacy, + toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"], +) + +def container_run_and_extract(name, legacy_load_behavior = True, **kwargs): + if legacy_load_behavior: + container_run_and_extract_legacy( + name = name, + **kwargs + ) + else: + container_run_and_extract_rule( + name = name, + **kwargs + ) # Export container_run_and_extract rule for other bazel rules to depend on. extract = struct( - attrs = _extract_attrs, + attrs = _extract_attrs_legacy, outputs = _extract_outputs, - implementation = _extract_impl, + implementation = _extract_impl_legacy, ) -container_run_and_extract = rule( - attrs = _extract_attrs, +def _commit_impl( + ctx, + name = None, + base = None, + cmd = None, + output_image_tar = None): + """Implementation for the container_run_and_commit rule. + + This rule runs a set of commands in a given image, waits for the commands + to finish, and then commits the container to a new image. + + Args: + ctx: The bazel rule context + name: A unique name for this rule. + base: The input image + cmd: str List, overrides ctx.attr.cmd + output_image_tar: The output image obtained as a result of running + the commands on the input image + """ + + name = name or ctx.attr.name + script = ctx.outputs.build + output_image_tar = output_image_tar or ctx.outputs.out + + docker_run_flags = "" + if ctx.attr.docker_run_flags != "": + docker_run_flags = ctx.attr.docker_run_flags + elif ctx.attr.base and ImageInfo in ctx.attr.base: + docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags + if "-d" not in docker_run_flags: + docker_run_flags += " -d" + + run_image = "%s.run" % name + run_image_output_executable = ctx.actions.declare_file("%s.executable" % run_image) + run_image_output_tarball = ctx.actions.declare_file("%s.tar" % run_image) + run_image_output_config = ctx.actions.declare_file("%s.json" % run_image) + run_image_output_config_digest = ctx.actions.declare_file("%s.json.sha256" % run_image) + run_image_output_digest = ctx.actions.declare_file("%s.digest" % run_image) + run_image_output_layer = ctx.actions.declare_file("%s-layer.tar" % run_image) + + toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info + + # Generate a shell script to execute the reset cmd + image_utils = ctx.actions.declare_file("image_util.sh") + ctx.actions.expand_template( + template = ctx.file._image_utils_tpl, + output = image_utils, + substitutions = { + "%{docker_flags}": " ".join(toolchain_info.docker_flags), + "%{docker_tool_path}": docker_path(toolchain_info), + }, + is_executable = True, + ) + + image_result = _image.implementation( + ctx, + name, + base = base, + cmd = cmd, + output_executable = run_image_output_executable, + output_tarball = run_image_output_tarball, + output_config = run_image_output_config, + output_config_digest = run_image_output_config_digest, + output_digest = run_image_output_digest, + output_layer = run_image_output_layer, + action_run = True, + docker_run_flags = docker_run_flags, + ) + + parent_parts = _get_layers(ctx, name, ctx.attr.base, base) + parent_config = parent_parts.get("config") + + # Construct a temporary name based on the build target. + tag_name = "{}:{}".format(_join_path(ctx.attr.repository, ctx.label.package), name) + footer = ctx.actions.declare_file(name + "_footer.sh") + + ctx.actions.expand_template( + template = ctx.file._run_tpl, + output = footer, + substitutions = { + "%{parent_config}": parent_config.path, + "%{legacy_load_behavior}": "false", + "%{output_image}": tag_name, + "%{output_tar}": output_image_tar.path, + "%{util_script}": image_utils.path, + }, + ) + + ctx.actions.run_shell( + inputs = [run_image_output_executable, footer], + outputs = [script], + mnemonic = "Concat", + command = """ + set -eu + cat {first} {second} > {output} + """.format( + first = run_image_output_executable.path, + second = footer.path, + output = script.path, + ), + ) + + ctx.actions.run( + executable = script, + tools = image_result[1].default_runfiles.files, + inputs = [parent_config] if parent_config else [], + outputs = [output_image_tar], + use_default_shell_env = True, + ) + + return [ + DefaultInfo( + files = depset([output_image_tar, script]), + ), + ] + +_commit_attrs = dicts.add(_image.attrs, { + "legacy_run_behavior": attr.bool( + default = False, + ), + "_image_utils_tpl": attr.label( + default = "//docker/util:image_util.sh.tpl", + allow_single_file = True, + ), + "_run_tpl": attr.label( + default = Label("//docker/util:commit.sh.tpl"), + allow_single_file = True, + ), +}) + +# @unsorted-dict-items +_commit_outputs = { + "out": "%{name}_commit.tar", + "build": "%{name}.build", +} + +container_run_and_commit_rule = rule( + attrs = _commit_attrs, + cfg = _container.image.cfg, doc = ("This rule runs a set of commands in a given image, waits" + - "for the commands to finish, and then extracts a given file" + - " from the container to the bazel-out directory."), - outputs = _extract_outputs, - implementation = _extract_impl, + "for the commands to finish, and then commits the" + + "container to a new image."), + executable = False, + outputs = _commit_outputs, + implementation = _commit_impl, toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"], ) -def _commit_impl( +# Export container_run_and_commit rule for other bazel rules to depend on. +commit = struct( + attrs = _commit_attrs, + outputs = _commit_outputs, + implementation = _commit_impl, +) + +def _commit_impl_legacy( ctx, name = None, image = None, commands = None, docker_run_flags = None, output_image_tar = None): - """Implementation for the container_run_and_commit rule. - - This rule runs a set of commands in a given image, waits for the commands - to finish, and then commits the container to a new image. + """Legacy implementation for the container_run_and_commit rule. Args: ctx: The bazel rule context @@ -216,6 +510,7 @@ def _commit_impl( "%{docker_tool_path}": docker_path(toolchain_info), "%{image_id_extractor_path}": ctx.executable._extract_image_id.path, "%{image_tar}": image.path, + "%{legacy_load_behavior}": "true", "%{output_image}": "bazel/%s:%s" % ( ctx.label.package or "default", name, @@ -239,7 +534,7 @@ def _commit_impl( return [] -_commit_attrs = { +_commit_attrs_legacy = { "commands": attr.string_list( doc = "A list of commands to run (sequentially) in the container.", mandatory = True, @@ -277,31 +572,237 @@ _commit_attrs = { ), } -# @unsorted-dict-items -_commit_outputs = { - "out": "%{name}_commit.tar", - "build": "%{name}.build", -} - -container_run_and_commit = rule( - attrs = _commit_attrs, +container_run_and_commit_legacy = rule( + attrs = _commit_attrs_legacy, doc = ("This rule runs a set of commands in a given image, waits" + "for the commands to finish, and then commits the" + "container to a new image."), executable = False, outputs = _commit_outputs, - implementation = _commit_impl, + implementation = _commit_impl_legacy, toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"], ) -# Export container_run_and_commit rule for other bazel rules to depend on. -commit = struct( - attrs = _commit_attrs, - outputs = _commit_outputs, - implementation = _commit_impl, -) +def container_run_and_commit(name, legacy_load_behavior = True, **kwargs): + if legacy_load_behavior: + container_run_and_commit_legacy( + name = name, + **kwargs + ) + else: + container_run_and_commit_rule( + name = name, + **kwargs + ) def _commit_layer_impl( + ctx, + name = None, + base = None, + cmd = None, + env = None, + compression = None, + compression_options = None, + output_layer_tar = None): + """Implementation for the container_run_and_commit_layer rule. + + This rule runs a set of commands in a given image, waits for the commands + to finish, and then extracts the layer of changes into a new container_layer target. + + Args: + ctx: The bazel rule context + name: A unique name for this rule. + base: File, overrides ctx.attr.base + cmd: str List, overrides ctx.attr.cmd + env: str Dict, overrides ctx.attr.env + compression: str, overrides ctx.attr.compression + compression_options: str list, overrides ctx.attr.compression_options + output_layer_tar: The output layer obtained as a result of running + the commands on the input image + """ + + name = name or ctx.attr.name + script = ctx.actions.declare_file(name + ".build") + output_layer_tar = output_layer_tar or ctx.outputs.layer + env = env or ctx.attr.env + compression = compression or ctx.attr.compression + compression_options = compression_options or ctx.attr.compression_options + + docker_run_flags = "" + if ctx.attr.docker_run_flags != "": + docker_run_flags = ctx.attr.docker_run_flags + elif ctx.attr.base and ImageInfo in ctx.attr.base: + docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags + if "-d" not in docker_run_flags: + docker_run_flags += " -d" + + run_image = "%s.run" % name + run_image_output_executable = ctx.actions.declare_file("%s.executable" % run_image) + run_image_output_tarball = ctx.actions.declare_file("%s.tar" % run_image) + run_image_output_config = ctx.actions.declare_file("%s.json" % run_image) + run_image_output_config_digest = ctx.actions.declare_file("%s.json.sha256" % run_image) + run_image_output_digest = ctx.actions.declare_file("%s.digest" % run_image) + run_image_output_layer = ctx.actions.declare_file("%s-layer.tar" % run_image) + + image_result = _image.implementation( + ctx, + name, + base = base, + cmd = cmd, + output_executable = run_image_output_executable, + output_tarball = run_image_output_tarball, + output_config = run_image_output_config, + output_config_digest = run_image_output_config_digest, + output_digest = run_image_output_digest, + output_layer = run_image_output_layer, + action_run = True, + docker_run_flags = docker_run_flags, + ) + + parent_parts = _get_layers(ctx, name, ctx.attr.base, base) + parent_config = parent_parts.get("config") + + toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info + + # Generate a shell script to execute the reset cmd + image_utils = ctx.actions.declare_file("image_util.sh") + ctx.actions.expand_template( + template = ctx.file._image_utils_tpl, + output = image_utils, + substitutions = { + "%{docker_flags}": " ".join(toolchain_info.docker_flags), + "%{docker_tool_path}": docker_path(toolchain_info), + }, + is_executable = True, + ) + + docker_env = [ + "{}={}".format( + ctx.expand_make_variables("env", key, {}), + ctx.expand_make_variables("env", value, {}), + ) + for key, value in env.items() + ] + + env_file = ctx.actions.declare_file(name + ".env") + ctx.actions.write(env_file, "\n".join(docker_env)) + + output_diff_id = ctx.actions.declare_file(output_layer_tar.basename + ".sha256") + footer = ctx.actions.declare_file(name + "_footer.sh") + + # Generate a shell script to execute the run statement and extract the layer + ctx.actions.expand_template( + template = ctx.file._run_tpl, + output = footer, + substitutions = { + "%{env_file_path}": env_file.path, + "%{parent_config}": parent_config.path, + "%{legacy_load_behavior}": "false", + "%{output_diff_id}": output_diff_id.path, + "%{image_id_extractor_path}": ctx.executable._extract_image_id.path, + "%{image_last_layer_extractor_path}": ctx.executable._last_layer_extractor_tool.path, + "%{output_image}": "bazel/%s:%s" % ( + ctx.label.package or "default", + name, + ), + "%{output_layer_tar}": output_layer_tar.path, + "%{util_script}": image_utils.path, + }, + is_executable = True, + ) + + ctx.actions.run_shell( + inputs = [run_image_output_executable, footer], + outputs = [script], + mnemonic = "Concat", + command = """ + set -eu + cat {first} {second} > {output} + """.format( + first = run_image_output_executable.path, + second = footer.path, + output = script.path, + ), + ) + + ctx.actions.run( + outputs = [output_layer_tar, output_diff_id], + inputs = [image_utils], + executable = script, + execution_requirements = { + # This action produces large output files, and isn't economical to + # upload to a remote cache. + "no-remote-cache": "1", + }, + mnemonic = "RunAndCommitLayer", + tools = [ctx.executable._extract_image_id, ctx.executable._last_layer_extractor_tool] + image_result[1].default_runfiles.files.to_list(), + use_default_shell_env = True, + ) + + # Generate a zipped layer and calculate the blob sum, this is for LayerInfo + zipped_layer, blob_sum = zip_layer( + ctx, + output_layer_tar, + compression = compression, + compression_options = compression_options, + ) + + return [ + LayerInfo( + unzipped_layer = output_layer_tar, + diff_id = output_diff_id, + zipped_layer = zipped_layer, + blob_sum = blob_sum, + env = env, + ), + ] + +_commit_layer_attrs = dicts.add(_image.attrs, { + "legacy_run_behavior": attr.bool( + default = False, + ), + "compression": attr.string(default = "gzip"), + "compression_options": attr.string_list(), + "_run_tpl": attr.label( + default = Label("//docker/util:commit_layer.sh.tpl"), + allow_single_file = True, + ), + "_extract_image_id": attr.label( + default = Label("//contrib:extract_image_id"), + cfg = "host", + executable = True, + allow_files = True, + ), + "_image_utils_tpl": attr.label( + default = "//docker/util:image_util.sh.tpl", + allow_single_file = True, + ), + "_last_layer_extractor_tool": attr.label( + default = Label("//contrib:extract_last_layer"), + cfg = "host", + executable = True, + allow_files = True, + ), +}) + +_commit_layer_outputs = { + "layer": "%{name}-layer.tar", + "script": "%{name}.build", +} + +container_run_and_commit_layer_rule = rule( + attrs = _commit_layer_attrs, + cfg = _container.image.cfg, + doc = ("This rule runs a set of commands in a given image, waits" + + "for the commands to finish, and then commits the" + + "container state to a new layer."), + executable = False, + outputs = _commit_layer_outputs, + implementation = _commit_layer_impl, + toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"], +) + +def _commit_layer_impl_legacy( ctx, name = None, image = None, @@ -372,6 +873,8 @@ def _commit_layer_impl( output = script, substitutions = { "%{commands}": _process_commands(commands), + "%{legacy_load_behavior}": "true", + "%{parent_config}": "", "%{docker_flags}": " ".join(toolchain_info.docker_flags), "%{docker_run_flags}": " ".join(docker_run_flags), "%{docker_tool_path}": docker_path(toolchain_info), @@ -419,7 +922,7 @@ def _commit_layer_impl( ), ] -_commit_layer_attrs = dicts.add({ +_commit_layer_attrs_legacy = dicts.add({ "commands": attr.string_list( doc = "A list of commands to run (sequentially) in the container.", mandatory = True, @@ -460,26 +963,35 @@ _commit_layer_attrs = dicts.add({ ), }, _hash_tools, _zip_tools) -_commit_layer_outputs = { - "layer": "%{name}-layer.tar", -} - -container_run_and_commit_layer = rule( - attrs = _commit_layer_attrs, +container_run_and_commit_layer_legacy = rule( + attrs = _commit_layer_attrs_legacy, doc = ("This rule runs a set of commands in a given image, waits" + "for the commands to finish, and then commits the" + "container state to a new layer."), executable = False, outputs = _commit_layer_outputs, - implementation = _commit_layer_impl, + implementation = _commit_layer_impl_legacy, toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"], ) +def container_run_and_commit_layer(name, legacy_load_behavior = True, **kwargs): + if legacy_load_behavior: + container_run_and_commit_layer_legacy( + name = name, + **kwargs + ) + else: + container_run_and_commit_layer_rule( + name = name, + legacy_load_behavior = False, + **kwargs + ) + # Export container_run_and_commit_layer rule for other bazel rules to depend on. commit_layer = struct( - attrs = _commit_layer_attrs, + attrs = _commit_layer_attrs_legacy, outputs = _commit_layer_outputs, - implementation = _commit_layer_impl, + implementation = _commit_layer_impl_legacy, ) def _process_commands(command_list): diff --git a/docs/container.md b/docs/container.md index 0c95672c9..bf5cd8261 100644 --- a/docs/container.md +++ b/docs/container.md @@ -403,7 +403,7 @@ image.implementation(<a href="#image.implementation-ctx">ctx</a>, <a href="#imag <a href="#image.implementation-compression_options">compression_options</a>, <a href="#image.implementation-experimental_tarball_format">experimental_tarball_format</a>, <a href="#image.implementation-debs">debs</a>, <a href="#image.implementation-tars">tars</a>, <a href="#image.implementation-architecture">architecture</a>, <a href="#image.implementation-operating_system">operating_system</a>, <a href="#image.implementation-os_version">os_version</a>, <a href="#image.implementation-output_executable">output_executable</a>, <a href="#image.implementation-output_tarball">output_tarball</a>, <a href="#image.implementation-output_config">output_config</a>, <a href="#image.implementation-output_config_digest">output_config_digest</a>, <a href="#image.implementation-output_digest">output_digest</a>, <a href="#image.implementation-output_layer">output_layer</a>, <a href="#image.implementation-workdir">workdir</a>, <a href="#image.implementation-null_cmd">null_cmd</a>, - <a href="#image.implementation-null_entrypoint">null_entrypoint</a>) + <a href="#image.implementation-null_entrypoint">null_entrypoint</a>, <a href="#image.implementation-action_run">action_run</a>, <a href="#image.implementation-docker_run_flags">docker_run_flags</a>) </pre> Implementation for the container_image rule. @@ -467,5 +467,7 @@ You can write a customized container_image rule by writing something like: | <a id="image.implementation-workdir"></a>workdir | str, overrides ctx.attr.workdir | <code>None</code> | | <a id="image.implementation-null_cmd"></a>null_cmd | bool, overrides ctx.attr.null_cmd | <code>None</code> | | <a id="image.implementation-null_entrypoint"></a>null_entrypoint | bool, overrides ctx.attr.null_entrypoint | <code>None</code> | +| <a id="image.implementation-action_run"></a>action_run | bool, whether output_executable is going to be run as an action | <code>False</code> | +| <a id="image.implementation-docker_run_flags"></a>docker_run_flags | <p align="center"> - </p> | <code>None</code> | diff --git a/tests/docker/util/BUILD b/tests/docker/util/BUILD index 3b12cdc60..dbef1212a 100644 --- a/tests/docker/util/BUILD +++ b/tests/docker/util/BUILD @@ -34,6 +34,19 @@ load( container_run_and_extract( name = "test_container_extract", + base = "@debian_base//image", + cmd = [ + "bash", + "-c", + "touch /foo.txt && echo 'test' > /foo.txt", + ], + docker_run_flags = "-u root", + extract_file = "/foo.txt", + legacy_load_behavior = False, +) + +container_run_and_extract( + name = "test_container_extract_legacy", commands = [ "touch /foo.txt", "echo 'test' > /foo.txt", @@ -69,6 +82,17 @@ file_test( container_run_and_commit( name = "test_container_commit", + base = "@debian_base//image", + cmd = [ + "touch", + "/foo.txt", + ], + docker_run_flags = "-u root", + legacy_load_behavior = False, +) + +container_run_and_commit( + name = "test_container_commit_legacy", commands = ["touch /foo.txt"], docker_run_flags = [ "-u", @@ -111,6 +135,7 @@ rule_test( name = "test_container_commit_layer_rule", generates = [ "test_container_commit_layer-layer.tar", + "test_container_commit_layer.build", ], rule = "test_container_commit_layer", )