From b3850a93e4fdbfa1527af14419342aa640b041da Mon Sep 17 00:00:00 2001
From: Eugene Daily <edaily@canva.com>
Date: Wed, 16 Feb 2022 16:04:24 +1100
Subject: [PATCH] incremental extract

---
 container/image.bzl               |  31 +-
 container/incremental_load.sh.tpl |  40 +-
 container/layer_tools.bzl         |  17 +-
 docker/util/commit.sh.tpl         |  31 +-
 docker/util/commit_layer.sh.tpl   |  40 +-
 docker/util/extract.sh.tpl        |  13 +-
 docker/util/image_util.sh.tpl     |  19 +
 docker/util/run.bzl               | 604 +++++++++++++++++++++++++++---
 docs/container.md                 |   4 +-
 tests/docker/util/BUILD           |  25 ++
 10 files changed, 716 insertions(+), 108 deletions(-)

diff --git a/container/image.bzl b/container/image.bzl
index fd2b72962..5a8b86058 100644
--- a/container/image.bzl
+++ b/container/image.bzl
@@ -297,7 +297,9 @@ def _impl(
         output_layer = None,
         workdir = None,
         null_cmd = None,
-        null_entrypoint = None):
+        null_entrypoint = None,
+        action_run = False,
+        docker_run_flags = None):
     """Implementation for the container_image rule.
 
     You can write a customized container_image rule by writing something like:
@@ -354,6 +356,7 @@ def _impl(
         workdir: str, overrides ctx.attr.workdir
         null_cmd: bool, overrides ctx.attr.null_cmd
         null_entrypoint: bool, overrides ctx.attr.null_entrypoint
+        action_run: bool, whether output_executable is going to be run as an action
     """
     name = name or ctx.label.name
     entrypoint = entrypoint or ctx.attr.entrypoint
@@ -371,7 +374,6 @@ def _impl(
     output_config = output_config or ctx.outputs.config
     output_config_digest = output_config_digest or ctx.outputs.config_digest
     output_layer = output_layer or ctx.outputs.layer
-    build_script = ctx.outputs.build_script
     null_cmd = null_cmd or ctx.attr.null_cmd
     null_entrypoint = null_entrypoint or ctx.attr.null_entrypoint
 
@@ -381,15 +383,19 @@ def _impl(
     # We do not use the default argument of attrs.string() in order to distinguish between
     # an image using the default and an image intentionally overriding the base's run flags.
     # Since this is a string attribute, the default value is the empty string.
-    if ctx.attr.docker_run_flags != "":
-        docker_run_flags = ctx.attr.docker_run_flags
-    elif ctx.attr.base and ImageInfo in ctx.attr.base:
-        docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
-    else:
-        # Run the container using host networking, so that the service is
-        # available to the developer without having to poke around with
-        # docker inspect.
-        docker_run_flags = "-i --rm --network=host"
+    docker_run_flags_are_default = False
+    if docker_run_flags == None:
+        if ctx.attr.docker_run_flags != "":
+            docker_run_flags = ctx.attr.docker_run_flags
+        elif ctx.attr.base and ImageInfo in ctx.attr.base:
+            docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
+        else:
+            docker_run_flags_are_default = True
+
+            # Run the container using host networking, so that the service is
+            # available to the developer without having to poke around with
+            # docker inspect.
+            docker_run_flags = "-i --rm --network=host"
 
     if ctx.attr.launcher:
         if not file_map:
@@ -509,6 +515,7 @@ def _impl(
         build_executable,
         run = not ctx.attr.legacy_run_behavior,
         run_flags = docker_run_flags,
+        action_run = action_run,
     )
 
     _assemble_image(
@@ -540,7 +547,7 @@ def _impl(
         ImageInfo(
             container_parts = container_parts,
             legacy_run_behavior = ctx.attr.legacy_run_behavior,
-            docker_run_flags = docker_run_flags,
+            docker_run_flags = "" if docker_run_flags_are_default and not ctx.attr.legacy_run_behavior else docker_run_flags,
         ),
         DefaultInfo(
             executable = build_executable,
diff --git a/container/incremental_load.sh.tpl b/container/incremental_load.sh.tpl
index b0e0157c8..e6a043eba 100644
--- a/container/incremental_load.sh.tpl
+++ b/container/incremental_load.sh.tpl
@@ -19,13 +19,18 @@ set -eu
 # This is a generated file that loads all docker layers built by "docker_build".
 
 function guess_runfiles() {
-    if [ -d ${BASH_SOURCE[0]}.runfiles ]; then
-        # Runfiles are adjacent to the current script.
-        echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )"
+    if [[ "%{action_run}" == "True" ]]; then
+        # The script is running as an action
+        pwd
     else
-        # The current script is within some other script's runfiles.
-        mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-        echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|'
+        if [ -d ${BASH_SOURCE[0]}.runfiles ]; then
+            # Runfiles are adjacent to the current script.
+            echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )"
+        else
+            # The current script is within some other script's runfiles.
+            mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+            echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|'
+        fi
     fi
 }
 
@@ -136,7 +141,7 @@ function import_config() {
   local tmp_dir="$(mktemp -d)"
   echo "${tmp_dir}" >> "${TEMP_FILES}"
 
-  cd "${tmp_dir}"
+  pushd "${tmp_dir}" >/dev/null
 
   # Docker elides layer reads from the tarball when it
   # already has a copy of the layer with the same basis
@@ -212,6 +217,8 @@ EOF
   # and then streaming exactly the layers we've established are
   # needed into the Docker daemon.
   tar cPh "${MISSING[@]}" | "${DOCKER}" ${DOCKER_FLAGS} load
+
+  popd >/dev/null
 }
 
 function tag_layer() {
@@ -279,13 +286,10 @@ if [[ "%{run}" == "True" ]]; then
       esac
   done
 
-  # Once we've loaded the images for all layers, we no longer need the temporary files on disk.
-  # We can clean up before we exec docker, since the exit handler will no longer run.
-  cleanup
-
+  # This generated and injected by docker_*.
+  args=(%{run_statement})
   # Bash treats empty arrays as unset variables for the purposes of `set -u`, so we only
   # conditionally add these arrays to our args.
-  args=(%{run_statement})
   if [[ ${#docker_args[@]} -gt 0 ]]; then
     args+=("${docker_args[@]}")
   fi
@@ -294,6 +298,14 @@ if [[ "%{run}" == "True" ]]; then
     args+=("${container_args[@]}")
   fi
 
-  # This generated and injected by docker_*.
-  eval exec "${args[@]}"
+  if [[ "%{action_run}" == "True" ]]; then
+    # This will be used by other scripts that are concatenated to this one.
+    id=$("${args[@]}")
+  else
+    # Once we've loaded the images for all layers, we no longer need the temporary files on disk.
+    # We can clean up before we exec docker, since the exit handler will no longer run.
+    cleanup
+
+    eval exec "${args[@]}"
+  fi
 fi
diff --git a/container/layer_tools.bzl b/container/layer_tools.bzl
index a5ec1f693..5a455d9c4 100644
--- a/container/layer_tools.bzl
+++ b/container/layer_tools.bzl
@@ -196,7 +196,8 @@ def incremental_load(
         output,
         stamp = False,
         run = False,
-        run_flags = None):
+        run_flags = None,
+        action_run = False):
     """Generate the incremental load statement.
 
 
@@ -207,6 +208,7 @@ def incremental_load(
        stamp: Whether to stamp the produced image
        run: Whether to run the script or not
        run_flags: Additional run flags
+       action_run: bool, whether output_executable is going to be run as an action
     """
     stamp_files = []
     if stamp:
@@ -237,7 +239,7 @@ def incremental_load(
         # First load the legacy base image, if it exists.
         if image.get("legacy"):
             load_statements.append(
-                "load_legacy '%s'" % _get_runfile_path(ctx, image["legacy"]),
+                "load_legacy '%s'" % (image["legacy"].path if action_run else _get_runfile_path(ctx, image["legacy"])),
             )
 
         pairs = zip(image["diff_id"], image["unzipped_layer"])
@@ -246,11 +248,11 @@ def incremental_load(
         # in the daemon.
         load_statements.append(
             "import_config '%s' %s" % (
-                _get_runfile_path(ctx, image["config"]),
+                image["config"].path if action_run else _get_runfile_path(ctx, image["config"]),
                 " ".join([
                     "'%s' '%s'" % (
-                        _get_runfile_path(ctx, diff_id),
-                        _get_runfile_path(ctx, unzipped_layer),
+                        diff_id.path if action_run else _get_runfile_path(ctx, diff_id),
+                        unzipped_layer.path if action_run else _get_runfile_path(ctx, unzipped_layer),
                     )
                     for (diff_id, unzipped_layer) in pairs
                 ]),
@@ -265,7 +267,7 @@ def incremental_load(
                 # It is notable that the only legal use of '{' in a
                 # tag would be for stamp variables, '$' is not allowed.
                 tag_reference,
-                _get_runfile_path(ctx, image["config_digest"]),
+                image["config_digest"].path if action_run else _get_runfile_path(ctx, image["config_digest"]),
             ),
         )
 
@@ -274,6 +276,7 @@ def incremental_load(
         substitutions = {
             "%{docker_flags}": " ".join(toolchain_info.docker_flags),
             "%{docker_tool_path}": docker_path(toolchain_info),
+            "%{action_run}": str(action_run),
             "%{load_statements}": "\n".join(load_statements),
             "%{run_statement}": run_statement,
             "%{run_tag}": run_tag,
@@ -282,7 +285,7 @@ def incremental_load(
             # variables, and turn references to them into bash variable
             # references.
             "%{stamp_statements}": "\n".join([
-                "read_variables %s" % _get_runfile_path(ctx, f)
+                "read_variables %s" % (f.path if action_run else _get_runfile_path(ctx, f))
                 for f in stamp_files
             ]),
             "%{tag_statements}": "\n".join(tag_statements),
diff --git a/docker/util/commit.sh.tpl b/docker/util/commit.sh.tpl
index 44a2bec93..1739e5f67 100644
--- a/docker/util/commit.sh.tpl
+++ b/docker/util/commit.sh.tpl
@@ -16,19 +16,30 @@ if [[ -z "$DOCKER" ]]; then
 fi
 
 logfile=$(output_logfile)
-
 if ! (
-    # Load the image and remember its name
-    image_id=$(%{image_id_extractor_path} %{image_tar})
-    $DOCKER $DOCKER_FLAGS load -i %{image_tar}
-
-    readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} $image_id %{commands})
     retcode=0
-    if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
-        reset_cmd $image_id $id %{output_image}
-        $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar}
+
+    if %{legacy_load_behavior}; then
+        # Load the image and remember its name
+        image_id=$(%{image_id_extractor_path} %{image_tar})
+        $DOCKER $DOCKER_FLAGS load -i %{image_tar}
+
+        readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} $image_id %{commands})
+        if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
+            reset_cmd $image_id $id %{output_image}
+            $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar}
+        else
+            retcode=$?
+        fi
     else
-        retcode=$?
+        # Actually wait for the container to finish running its commands
+        retcode=$($DOCKER $DOCKER_FLAGS wait $id)
+        # Trigger a failure if the run had a non-zero exit status
+        if [ $retcode != 0 ]; then
+            $DOCKER $DOCKER_FLAGS logs $id && false
+        fi
+        reset_parent_cmd %{parent_config} $id %{output_image}
+        $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar}
     fi
 
     $DOCKER $DOCKER_FLAGS rm $id
diff --git a/docker/util/commit_layer.sh.tpl b/docker/util/commit_layer.sh.tpl
index 8768e5a78..4bf76fbe8 100644
--- a/docker/util/commit_layer.sh.tpl
+++ b/docker/util/commit_layer.sh.tpl
@@ -17,15 +17,34 @@ fi
 logfile=$(output_logfile)
 
 if ! (
-    # Load the image and remember its name
-    image_id=$(%{image_id_extractor_path} %{image_tar})
-    $DOCKER $DOCKER_FLAGS load -i %{image_tar}
-
-    readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands})
-    retcode=0
-    if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
-        OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar"
-        reset_cmd $image_id $id %{output_image}
+    OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar"
+    if %{legacy_load_behavior}; then
+        # Load the image and remember its name
+        image_id=$(%{image_id_extractor_path} %{image_tar})
+        $DOCKER $DOCKER_FLAGS load -i %{image_tar}
+
+        readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands})
+        retcode=0
+        if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
+            reset_cmd $image_id $id %{output_image}
+            $DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR
+
+            # Extract the last layer from the image - this will be the layer generated by $DOCKER commit
+            %{image_last_layer_extractor_path} $OUTPUT_IMAGE_TAR %{output_layer_tar} %{output_diff_id}
+
+            # Delete the intermediate tar
+            rm $OUTPUT_IMAGE_TAR
+        else
+            retcode=$?
+        fi
+    else
+        # Actually wait for the container to finish running its commands
+        retcode=$($DOCKER $DOCKER_FLAGS wait $id)
+        # Trigger a failure if the run had a non-zero exit status
+        if [ $retcode != 0 ]; then
+            $DOCKER $DOCKER_FLAGS logs $id && false
+        fi
+        reset_parent_cmd %{parent_config} $id %{output_image}
         $DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR
 
         # Extract the last layer from the image - this will be the layer generated by $DOCKER commit
@@ -33,10 +52,9 @@ if ! (
 
         # Delete the intermediate tar
         rm $OUTPUT_IMAGE_TAR
-    else
-        retcode=$?
     fi
 
+
     # Delete the container and the intermediate image
     $DOCKER $DOCKER_FLAGS rm $id
     $DOCKER $DOCKER_FLAGS rmi %{output_image}
diff --git a/docker/util/extract.sh.tpl b/docker/util/extract.sh.tpl
index b886b83f3..c18923de4 100644
--- a/docker/util/extract.sh.tpl
+++ b/docker/util/extract.sh.tpl
@@ -16,14 +16,17 @@ logfile=$(mktemp)
 trap "rm $logfile" EXIT
 
 if ! (
-    # Load the image and remember its name
-    image_id=$(%{image_id_extractor_path} %{image_tar})
-    $DOCKER $DOCKER_FLAGS load -i %{image_tar}
-
-    id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} $image_id %{commands})
+    if %{legacy_load_behavior}; then
+        # Load the image and remember its name
+        image_id=$(%{image_id_extractor_path} %{image_tar})
+        $DOCKER $DOCKER_FLAGS load -i %{image_tar}
 
+        id=$($DOCKER $DOCKER_FLAGS run -d %{docker_run_flags} $image_id %{commands})
+    fi
+    
     retcode=$($DOCKER $DOCKER_FLAGS wait $id)
 
+
     # Print any error that occurred in the container.
     if [ $retcode != 0 ]; then
         $DOCKER $DOCKER_FLAGS logs $id && false
diff --git a/docker/util/image_util.sh.tpl b/docker/util/image_util.sh.tpl
index dfe4c288f..e5ad167e6 100755
--- a/docker/util/image_util.sh.tpl
+++ b/docker/util/image_util.sh.tpl
@@ -21,6 +21,25 @@ reset_cmd() {
     $DOCKER $DOCKER_FLAGS commit -c "CMD $fmt_cmd" "${container_id}" "${output_image_name}"
 }
 
+reset_parent_cmd() {
+    local parent_config=$1
+    local container_id=$2
+    local output_image_name=$3
+
+    # Resolve the docker tool path
+    DOCKER="%{docker_tool_path}"
+    DOCKER_FLAGS="%{docker_flags}"
+
+    local config cmd regex
+    config=$(< "${parent_config}")
+    cmd='["/bin/sh", "-c"]'
+    regex='\"Cmd\" ?: ?(\[[^]]*\])'
+    if [[ config =~ regex ]]; then
+        cmd=${BASH_REMATCH[1]}
+    fi
+    $DOCKER $DOCKER_FLAGS commit -c "CMD $cmd" "${container_id}" "${output_image_name}"
+}
+
 function output_logfile {
     readonly filename=$(mktemp)
 
diff --git a/docker/util/run.bzl b/docker/util/run.bzl
index eedb488d4..6d06db9ef 100644
--- a/docker/util/run.bzl
+++ b/docker/util/run.bzl
@@ -23,8 +23,12 @@ load(
     "//skylib:hash.bzl",
     _hash_tools = "tools",
 )
+load("@io_bazel_rules_docker//container:container.bzl", _container = "container")
+load("@io_bazel_rules_docker//container:image.bzl", _image = "image")
 load("@io_bazel_rules_docker//container:layer.bzl", "zip_layer")
-load("@io_bazel_rules_docker//container:providers.bzl", "LayerInfo")
+load("@io_bazel_rules_docker//container:layer_tools.bzl", _get_layers = "get_from_target")
+load("@io_bazel_rules_docker//container:providers.bzl", "ImageInfo", "LayerInfo")
+load("//skylib:path.bzl", _join_path = "join")
 load(
     "//skylib:zip.bzl",
     _zip_tools = "tools",
@@ -37,9 +41,8 @@ load(
 def _extract_impl(
         ctx,
         name = "",
-        image = None,
-        commands = None,
-        docker_run_flags = None,
+        base = None,
+        cmd = None,
         extract_file = "",
         output_file = "",
         script_file = "",
@@ -50,6 +53,137 @@ def _extract_impl(
     to finish, and then extracts a given file from the container to the
     bazel-out directory.
 
+    Args:
+        ctx: The bazel rule context
+        name: String, overrides ctx.label.name
+        base: File, overrides ctx.attr.base
+        cmd: str List, overrides ctx.attr.cmd
+        extract_file: File, overrides ctx.outputs.out
+        output_file: File, overrides ctx.outputs.output_file
+        script_file: File, overrides ctx.output.script_file
+        extra_deps: Label list, if not None these are passed as inputs
+                    to the action running the container. This can be used if
+                    e.g., you need to mount a directory that is produced
+                    by another action.
+    """
+
+    name = name or ctx.label.name
+    extract_file = extract_file or ctx.attr.extract_file
+    output_file = output_file or ctx.outputs.out
+    script_file = script_file or ctx.outputs.script
+
+    docker_run_flags = ""
+    if ctx.attr.docker_run_flags != "":
+        docker_run_flags = ctx.attr.docker_run_flags
+    elif ctx.attr.base and ImageInfo in ctx.attr.base:
+        docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
+    if "-d" not in docker_run_flags:
+        docker_run_flags += " -d"
+
+    run_image = "%s.run" % name
+    run_image_output_executable = ctx.actions.declare_file("%s.executable" % run_image)
+    run_image_output_tarball = ctx.actions.declare_file("%s.tar" % run_image)
+    run_image_output_config = ctx.actions.declare_file("%s.json" % run_image)
+    run_image_output_config_digest = ctx.actions.declare_file("%s.json.sha256" % run_image)
+    run_image_output_digest = ctx.actions.declare_file("%s.digest" % run_image)
+    run_image_output_layer = ctx.actions.declare_file("%s-layer.tar" % run_image)
+
+    image_result = _image.implementation(
+        ctx,
+        name,
+        base = base,
+        cmd = cmd,
+        output_executable = run_image_output_executable,
+        output_tarball = run_image_output_tarball,
+        output_config = run_image_output_config,
+        output_config_digest = run_image_output_config_digest,
+        output_digest = run_image_output_digest,
+        output_layer = run_image_output_layer,
+        action_run = True,
+        docker_run_flags = docker_run_flags,
+    )
+
+    footer = ctx.actions.declare_file(name + "_footer.sh")
+
+    ctx.actions.expand_template(
+        template = ctx.file._extract_tpl,
+        output = footer,
+        substitutions = {
+            "%{extract_file}": extract_file,
+            "%{legacy_load_behavior}": "false",
+            "%{output}": output_file.path,
+        },
+    )
+
+    ctx.actions.run_shell(
+        inputs = [run_image_output_executable, footer],
+        outputs = [script_file],
+        mnemonic = "Concat",
+        command = """
+            set -eu
+            cat {first} {second} > {output}
+        """.format(
+            first = run_image_output_executable.path,
+            second = footer.path,
+            output = script_file.path,
+        ),
+    )
+
+    ctx.actions.run(
+        executable = script_file,
+        tools = image_result[1].default_runfiles.files,
+        outputs = [output_file],
+        use_default_shell_env = True,
+    )
+
+    return [
+        DefaultInfo(
+            files = depset([script_file, output_file]),
+        ),
+    ]
+
+_extract_attrs = dicts.add(_image.attrs, {
+    "extract_file": attr.string(
+        doc = "Path to file to extract from container.",
+        mandatory = True,
+    ),
+    "legacy_run_behavior": attr.bool(
+        default = False,
+    ),
+    "_extract_tpl": attr.label(
+        default = Label("//docker/util:extract.sh.tpl"),
+        allow_single_file = True,
+    ),
+})
+
+_extract_outputs = {
+    "out": "%{name}%{extract_file}",
+    "script": "%{name}.build",
+}
+
+container_run_and_extract_rule = rule(
+    attrs = _extract_attrs,
+    cfg = _container.image.cfg,
+    doc = ("This rule runs a set of commands in a given image, waits" +
+           "for the commands to finish, and then extracts a given file" +
+           " from the container to the bazel-out directory."),
+    outputs = _extract_outputs,
+    implementation = _extract_impl,
+    toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"],
+)
+
+def _extract_impl_legacy(
+        ctx,
+        name = "",
+        image = None,
+        commands = None,
+        docker_run_flags = None,
+        extract_file = "",
+        output_file = "",
+        script_file = "",
+        extra_deps = None):
+    """Legacy implementation for the container_run_and_extract rule.
+
     Args:
         ctx: The bazel rule context
         name: String, overrides ctx.label.name
@@ -87,6 +221,7 @@ def _extract_impl(
             "%{extract_file}": extract_file,
             "%{image_id_extractor_path}": ctx.executable._extract_image_id.path,
             "%{image_tar}": image.path,
+            "%{legacy_load_behavior}": "true",
             "%{output}": output_file.path,
         },
         is_executable = True,
@@ -102,7 +237,7 @@ def _extract_impl(
 
     return []
 
-_extract_attrs = {
+_extract_attrs_legacy = {
     "commands": attr.string_list(
         doc = "A list of commands to run (sequentially) in the container.",
         mandatory = True,
@@ -140,39 +275,194 @@ _extract_attrs = {
     ),
 }
 
-_extract_outputs = {
-    "out": "%{name}%{extract_file}",
-    "script": "%{name}.build",
-}
+container_run_and_extract_legacy = rule(
+    attrs = _extract_attrs_legacy,
+    doc = ("This rule runs a set of commands in a given image, waits" +
+           "for the commands to finish, and then extracts a given file" +
+           " from the container to the bazel-out directory."),
+    outputs = _extract_outputs,
+    implementation = _extract_impl_legacy,
+    toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"],
+)
+
+def container_run_and_extract(name, legacy_load_behavior = True, **kwargs):
+    if legacy_load_behavior:
+        container_run_and_extract_legacy(
+            name = name,
+            **kwargs
+        )
+    else:
+        container_run_and_extract_rule(
+            name = name,
+            **kwargs
+        )
 
 # Export container_run_and_extract rule for other bazel rules to depend on.
 extract = struct(
-    attrs = _extract_attrs,
+    attrs = _extract_attrs_legacy,
     outputs = _extract_outputs,
-    implementation = _extract_impl,
+    implementation = _extract_impl_legacy,
 )
 
-container_run_and_extract = rule(
-    attrs = _extract_attrs,
+def _commit_impl(
+        ctx,
+        name = None,
+        base = None,
+        cmd = None,
+        output_image_tar = None):
+    """Implementation for the container_run_and_commit rule.
+
+    This rule runs a set of commands in a given image, waits for the commands
+    to finish, and then commits the container to a new image.
+
+    Args:
+        ctx: The bazel rule context
+        name: A unique name for this rule.
+        base: The input image
+        cmd: str List, overrides ctx.attr.cmd
+        output_image_tar: The output image obtained as a result of running
+                          the commands on the input image
+    """
+
+    name = name or ctx.attr.name
+    script = ctx.outputs.build
+    output_image_tar = output_image_tar or ctx.outputs.out
+
+    docker_run_flags = ""
+    if ctx.attr.docker_run_flags != "":
+        docker_run_flags = ctx.attr.docker_run_flags
+    elif ctx.attr.base and ImageInfo in ctx.attr.base:
+        docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
+    if "-d" not in docker_run_flags:
+        docker_run_flags += " -d"
+
+    run_image = "%s.run" % name
+    run_image_output_executable = ctx.actions.declare_file("%s.executable" % run_image)
+    run_image_output_tarball = ctx.actions.declare_file("%s.tar" % run_image)
+    run_image_output_config = ctx.actions.declare_file("%s.json" % run_image)
+    run_image_output_config_digest = ctx.actions.declare_file("%s.json.sha256" % run_image)
+    run_image_output_digest = ctx.actions.declare_file("%s.digest" % run_image)
+    run_image_output_layer = ctx.actions.declare_file("%s-layer.tar" % run_image)
+
+    toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info
+
+    # Generate a shell script to execute the reset cmd
+    image_utils = ctx.actions.declare_file("image_util.sh")
+    ctx.actions.expand_template(
+        template = ctx.file._image_utils_tpl,
+        output = image_utils,
+        substitutions = {
+            "%{docker_flags}": " ".join(toolchain_info.docker_flags),
+            "%{docker_tool_path}": docker_path(toolchain_info),
+        },
+        is_executable = True,
+    )
+
+    image_result = _image.implementation(
+        ctx,
+        name,
+        base = base,
+        cmd = cmd,
+        output_executable = run_image_output_executable,
+        output_tarball = run_image_output_tarball,
+        output_config = run_image_output_config,
+        output_config_digest = run_image_output_config_digest,
+        output_digest = run_image_output_digest,
+        output_layer = run_image_output_layer,
+        action_run = True,
+        docker_run_flags = docker_run_flags,
+    )
+
+    parent_parts = _get_layers(ctx, name, ctx.attr.base, base)
+    parent_config = parent_parts.get("config")
+
+    # Construct a temporary name based on the build target.
+    tag_name = "{}:{}".format(_join_path(ctx.attr.repository, ctx.label.package), name)
+    footer = ctx.actions.declare_file(name + "_footer.sh")
+
+    ctx.actions.expand_template(
+        template = ctx.file._run_tpl,
+        output = footer,
+        substitutions = {
+            "%{parent_config}": parent_config.path,
+            "%{legacy_load_behavior}": "false",
+            "%{output_image}": tag_name,
+            "%{output_tar}": output_image_tar.path,
+            "%{util_script}": image_utils.path,
+        },
+    )
+
+    ctx.actions.run_shell(
+        inputs = [run_image_output_executable, footer],
+        outputs = [script],
+        mnemonic = "Concat",
+        command = """
+            set -eu
+            cat {first} {second} > {output}
+        """.format(
+            first = run_image_output_executable.path,
+            second = footer.path,
+            output = script.path,
+        ),
+    )
+
+    ctx.actions.run(
+        executable = script,
+        tools = image_result[1].default_runfiles.files,
+        inputs = [parent_config] if parent_config else [],
+        outputs = [output_image_tar],
+        use_default_shell_env = True,
+    )
+
+    return [
+        DefaultInfo(
+            files = depset([output_image_tar, script]),
+        ),
+    ]
+
+_commit_attrs = dicts.add(_image.attrs, {
+    "legacy_run_behavior": attr.bool(
+        default = False,
+    ),
+    "_run_tpl": attr.label(
+        default = Label("//docker/util:commit.sh.tpl"),
+        allow_single_file = True,
+    ),
+})
+
+# @unsorted-dict-items
+_commit_outputs = {
+    "out": "%{name}_commit.tar",
+    "build": "%{name}.build",
+}
+
+container_run_and_commit_rule = rule(
+    attrs = _commit_attrs,
+    cfg = _container.image.cfg,
     doc = ("This rule runs a set of commands in a given image, waits" +
-           "for the commands to finish, and then extracts a given file" +
-           " from the container to the bazel-out directory."),
-    outputs = _extract_outputs,
-    implementation = _extract_impl,
+           "for the commands to finish, and then commits the" +
+           "container to a new image."),
+    executable = False,
+    outputs = _commit_outputs,
+    implementation = _commit_impl,
     toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"],
 )
 
-def _commit_impl(
+# Export container_run_and_commit rule for other bazel rules to depend on.
+commit = struct(
+    attrs = _commit_attrs,
+    outputs = _commit_outputs,
+    implementation = _commit_impl,
+)
+
+def _commit_impl_legacy(
         ctx,
         name = None,
         image = None,
         commands = None,
         docker_run_flags = None,
         output_image_tar = None):
-    """Implementation for the container_run_and_commit rule.
-
-    This rule runs a set of commands in a given image, waits for the commands
-    to finish, and then commits the container to a new image.
+    """Legacy implementation for the container_run_and_commit rule.
 
     Args:
         ctx: The bazel rule context
@@ -216,6 +506,7 @@ def _commit_impl(
             "%{docker_tool_path}": docker_path(toolchain_info),
             "%{image_id_extractor_path}": ctx.executable._extract_image_id.path,
             "%{image_tar}": image.path,
+            "%{legacy_load_behavior}": "true",
             "%{output_image}": "bazel/%s:%s" % (
                 ctx.label.package or "default",
                 name,
@@ -239,7 +530,7 @@ def _commit_impl(
 
     return []
 
-_commit_attrs = {
+_commit_attrs_legacy = {
     "commands": attr.string_list(
         doc = "A list of commands to run (sequentially) in the container.",
         mandatory = True,
@@ -277,31 +568,237 @@ _commit_attrs = {
     ),
 }
 
-# @unsorted-dict-items
-_commit_outputs = {
-    "out": "%{name}_commit.tar",
-    "build": "%{name}.build",
-}
-
-container_run_and_commit = rule(
-    attrs = _commit_attrs,
+container_run_and_commit_legacy = rule(
+    attrs = _commit_attrs_legacy,
     doc = ("This rule runs a set of commands in a given image, waits" +
            "for the commands to finish, and then commits the" +
            "container to a new image."),
     executable = False,
     outputs = _commit_outputs,
-    implementation = _commit_impl,
+    implementation = _commit_impl_legacy,
     toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"],
 )
 
-# Export container_run_and_commit rule for other bazel rules to depend on.
-commit = struct(
-    attrs = _commit_attrs,
-    outputs = _commit_outputs,
-    implementation = _commit_impl,
-)
+def container_run_and_commit(name, legacy_load_behavior = True, **kwargs):
+    if legacy_load_behavior:
+        container_run_and_commit_legacy(
+            name = name,
+            **kwargs
+        )
+    else:
+        container_run_and_commit_rule(
+            name = name,
+            **kwargs
+        )
 
 def _commit_layer_impl(
+        ctx,
+        name = None,
+        base = None,
+        cmd = None,
+        env = None,
+        compression = None,
+        compression_options = None,
+        output_layer_tar = None):
+    """Implementation for the container_run_and_commit_layer rule.
+
+    This rule runs a set of commands in a given image, waits for the commands
+    to finish, and then extracts the layer of changes into a new container_layer target.
+
+    Args:
+        ctx: The bazel rule context
+        name: A unique name for this rule.
+        base: File, overrides ctx.attr.base
+        cmd: str List, overrides ctx.attr.cmd
+        env: str Dict, overrides ctx.attr.env
+        compression: str, overrides ctx.attr.compression
+        compression_options: str list, overrides ctx.attr.compression_options
+        output_layer_tar: The output layer obtained as a result of running
+                          the commands on the input image
+    """
+
+    name = name or ctx.attr.name
+    script = ctx.actions.declare_file(name + ".build")
+    output_layer_tar = output_layer_tar or ctx.outputs.layer
+    env = env or ctx.attr.env
+    compression = compression or ctx.attr.compression
+    compression_options = compression_options or ctx.attr.compression_options
+
+    docker_run_flags = ""
+    if ctx.attr.docker_run_flags != "":
+        docker_run_flags = ctx.attr.docker_run_flags
+    elif ctx.attr.base and ImageInfo in ctx.attr.base:
+        docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
+    if "-d" not in docker_run_flags:
+        docker_run_flags += " -d"
+
+    run_image = "%s.run" % name
+    run_image_output_executable = ctx.actions.declare_file("%s.executable" % run_image)
+    run_image_output_tarball = ctx.actions.declare_file("%s.tar" % run_image)
+    run_image_output_config = ctx.actions.declare_file("%s.json" % run_image)
+    run_image_output_config_digest = ctx.actions.declare_file("%s.json.sha256" % run_image)
+    run_image_output_digest = ctx.actions.declare_file("%s.digest" % run_image)
+    run_image_output_layer = ctx.actions.declare_file("%s-layer.tar" % run_image)
+
+    image_result = _image.implementation(
+        ctx,
+        name,
+        base = base,
+        cmd = cmd,
+        output_executable = run_image_output_executable,
+        output_tarball = run_image_output_tarball,
+        output_config = run_image_output_config,
+        output_config_digest = run_image_output_config_digest,
+        output_digest = run_image_output_digest,
+        output_layer = run_image_output_layer,
+        action_run = True,
+        docker_run_flags = docker_run_flags,
+    )
+
+    parent_parts = _get_layers(ctx, name, ctx.attr.base, base)
+    parent_config = parent_parts.get("config")
+
+    toolchain_info = ctx.toolchains["@io_bazel_rules_docker//toolchains/docker:toolchain_type"].info
+
+    # Generate a shell script to execute the reset cmd
+    image_utils = ctx.actions.declare_file("image_util.sh")
+    ctx.actions.expand_template(
+        template = ctx.file._image_utils_tpl,
+        output = image_utils,
+        substitutions = {
+            "%{docker_flags}": " ".join(toolchain_info.docker_flags),
+            "%{docker_tool_path}": docker_path(toolchain_info),
+        },
+        is_executable = True,
+    )
+
+    docker_env = [
+        "{}={}".format(
+            ctx.expand_make_variables("env", key, {}),
+            ctx.expand_make_variables("env", value, {}),
+        )
+        for key, value in env.items()
+    ]
+
+    env_file = ctx.actions.declare_file(name + ".env")
+    ctx.actions.write(env_file, "\n".join(docker_env))
+
+    output_diff_id = ctx.actions.declare_file(output_layer_tar.basename + ".sha256")
+    footer = ctx.actions.declare_file(name + "_footer.sh")
+
+    # Generate a shell script to execute the run statement and extract the layer
+    ctx.actions.expand_template(
+        template = ctx.file._run_tpl,
+        output = footer,
+        substitutions = {
+            "%{env_file_path}": env_file.path,
+            "%{parent_config}": parent_config.path,
+            "%{legacy_load_behavior}": "false",
+            "%{output_diff_id}": output_diff_id.path,
+            "%{image_id_extractor_path}": ctx.executable._extract_image_id.path,
+            "%{image_last_layer_extractor_path}": ctx.executable._last_layer_extractor_tool.path,
+            "%{output_image}": "bazel/%s:%s" % (
+                ctx.label.package or "default",
+                name,
+            ),
+            "%{output_layer_tar}": output_layer_tar.path,
+            "%{util_script}": image_utils.path,
+        },
+        is_executable = True,
+    )
+
+    ctx.actions.run_shell(
+        inputs = [run_image_output_executable, footer],
+        outputs = [script],
+        mnemonic = "Concat",
+        command = """
+            set -eu
+            cat {first} {second} > {output}
+        """.format(
+            first = run_image_output_executable.path,
+            second = footer.path,
+            output = script.path,
+        ),
+    )
+
+    ctx.actions.run(
+        outputs = [output_layer_tar, output_diff_id],
+        inputs = [image_utils],
+        executable = script,
+        execution_requirements = {
+            # This action produces large output files, and isn't economical to
+            # upload to a remote cache.
+            "no-remote-cache": "1",
+        },
+        mnemonic = "RunAndCommitLayer",
+        tools = [ctx.executable._extract_image_id, ctx.executable._last_layer_extractor_tool] + image_result[1].default_runfiles.files.to_list(),
+        use_default_shell_env = True,
+    )
+
+    # Generate a zipped layer and calculate the blob sum, this is for LayerInfo
+    zipped_layer, blob_sum = zip_layer(
+        ctx,
+        output_layer_tar,
+        compression = compression,
+        compression_options = compression_options,
+    )
+
+    return [
+        LayerInfo(
+            unzipped_layer = output_layer_tar,
+            diff_id = output_diff_id,
+            zipped_layer = zipped_layer,
+            blob_sum = blob_sum,
+            env = env,
+        ),
+    ]
+
+_commit_layer_attrs = dicts.add(_image.attrs, {
+    "legacy_run_behavior": attr.bool(
+        default = False,
+    ),
+    "compression": attr.string(default = "gzip"),
+    "compression_options": attr.string_list(),
+    "_run_tpl": attr.label(
+        default = Label("//docker/util:commit_layer.sh.tpl"),
+        allow_single_file = True,
+    ),
+    "_extract_image_id": attr.label(
+        default = Label("//contrib:extract_image_id"),
+        cfg = "host",
+        executable = True,
+        allow_files = True,
+    ),
+    "_image_utils_tpl": attr.label(
+        default = "//docker/util:image_util.sh.tpl",
+        allow_single_file = True,
+    ),
+    "_last_layer_extractor_tool": attr.label(
+        default = Label("//contrib:extract_last_layer"),
+        cfg = "host",
+        executable = True,
+        allow_files = True,
+    ),
+})
+
+_commit_layer_outputs = {
+    "layer": "%{name}-layer.tar",
+    "script": "%{name}.build",
+}
+
+container_run_and_commit_layer_rule = rule(
+    attrs = _commit_layer_attrs,
+    cfg = _container.image.cfg,
+    doc = ("This rule runs a set of commands in a given image, waits" +
+           "for the commands to finish, and then commits the" +
+           "container state to a new layer."),
+    executable = False,
+    outputs = _commit_layer_outputs,
+    implementation = _commit_layer_impl,
+    toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"],
+)
+
+def _commit_layer_impl_legacy(
         ctx,
         name = None,
         image = None,
@@ -372,6 +869,8 @@ def _commit_layer_impl(
         output = script,
         substitutions = {
             "%{commands}": _process_commands(commands),
+            "%{legacy_load_behavior}": "true",
+            "%{parent_config}": "",
             "%{docker_flags}": " ".join(toolchain_info.docker_flags),
             "%{docker_run_flags}": " ".join(docker_run_flags),
             "%{docker_tool_path}": docker_path(toolchain_info),
@@ -419,7 +918,7 @@ def _commit_layer_impl(
         ),
     ]
 
-_commit_layer_attrs = dicts.add({
+_commit_layer_attrs_legacy = dicts.add({
     "commands": attr.string_list(
         doc = "A list of commands to run (sequentially) in the container.",
         mandatory = True,
@@ -460,26 +959,35 @@ _commit_layer_attrs = dicts.add({
     ),
 }, _hash_tools, _zip_tools)
 
-_commit_layer_outputs = {
-    "layer": "%{name}-layer.tar",
-}
-
-container_run_and_commit_layer = rule(
-    attrs = _commit_layer_attrs,
+container_run_and_commit_layer_legacy = rule(
+    attrs = _commit_layer_attrs_legacy,
     doc = ("This rule runs a set of commands in a given image, waits" +
            "for the commands to finish, and then commits the" +
            "container state to a new layer."),
     executable = False,
     outputs = _commit_layer_outputs,
-    implementation = _commit_layer_impl,
+    implementation = _commit_layer_impl_legacy,
     toolchains = ["@io_bazel_rules_docker//toolchains/docker:toolchain_type"],
 )
 
+def container_run_and_commit_layer(name, legacy_load_behavior = True, **kwargs):
+    if legacy_load_behavior:
+        container_run_and_commit_layer_legacy(
+            name = name,
+            **kwargs
+        )
+    else:
+        container_run_and_commit_layer_rule(
+            name = name,
+            legacy_load_behavior = False,
+            **kwargs
+        )
+
 # Export container_run_and_commit_layer rule for other bazel rules to depend on.
 commit_layer = struct(
-    attrs = _commit_layer_attrs,
+    attrs = _commit_layer_attrs_legacy,
     outputs = _commit_layer_outputs,
-    implementation = _commit_layer_impl,
+    implementation = _commit_layer_impl_legacy,
 )
 
 def _process_commands(command_list):
diff --git a/docs/container.md b/docs/container.md
index 0c95672c9..bf5cd8261 100644
--- a/docs/container.md
+++ b/docs/container.md
@@ -403,7 +403,7 @@ image.implementation(<a href="#image.implementation-ctx">ctx</a>, <a href="#imag
                      <a href="#image.implementation-compression_options">compression_options</a>, <a href="#image.implementation-experimental_tarball_format">experimental_tarball_format</a>, <a href="#image.implementation-debs">debs</a>, <a href="#image.implementation-tars">tars</a>, <a href="#image.implementation-architecture">architecture</a>,
                      <a href="#image.implementation-operating_system">operating_system</a>, <a href="#image.implementation-os_version">os_version</a>, <a href="#image.implementation-output_executable">output_executable</a>, <a href="#image.implementation-output_tarball">output_tarball</a>, <a href="#image.implementation-output_config">output_config</a>,
                      <a href="#image.implementation-output_config_digest">output_config_digest</a>, <a href="#image.implementation-output_digest">output_digest</a>, <a href="#image.implementation-output_layer">output_layer</a>, <a href="#image.implementation-workdir">workdir</a>, <a href="#image.implementation-null_cmd">null_cmd</a>,
-                     <a href="#image.implementation-null_entrypoint">null_entrypoint</a>)
+                     <a href="#image.implementation-null_entrypoint">null_entrypoint</a>, <a href="#image.implementation-action_run">action_run</a>, <a href="#image.implementation-docker_run_flags">docker_run_flags</a>)
 </pre>
 
 Implementation for the container_image rule.
@@ -467,5 +467,7 @@ You can write a customized container_image rule by writing something like:
 | <a id="image.implementation-workdir"></a>workdir |  str, overrides ctx.attr.workdir   |  <code>None</code> |
 | <a id="image.implementation-null_cmd"></a>null_cmd |  bool, overrides ctx.attr.null_cmd   |  <code>None</code> |
 | <a id="image.implementation-null_entrypoint"></a>null_entrypoint |  bool, overrides ctx.attr.null_entrypoint   |  <code>None</code> |
+| <a id="image.implementation-action_run"></a>action_run |  bool, whether output_executable is going to be run as an action   |  <code>False</code> |
+| <a id="image.implementation-docker_run_flags"></a>docker_run_flags |  <p align="center"> - </p>   |  <code>None</code> |
 
 
diff --git a/tests/docker/util/BUILD b/tests/docker/util/BUILD
index 3b12cdc60..dbef1212a 100644
--- a/tests/docker/util/BUILD
+++ b/tests/docker/util/BUILD
@@ -34,6 +34,19 @@ load(
 
 container_run_and_extract(
     name = "test_container_extract",
+    base = "@debian_base//image",
+    cmd = [
+        "bash",
+        "-c",
+        "touch /foo.txt && echo 'test' > /foo.txt",
+    ],
+    docker_run_flags = "-u root",
+    extract_file = "/foo.txt",
+    legacy_load_behavior = False,
+)
+
+container_run_and_extract(
+    name = "test_container_extract_legacy",
     commands = [
         "touch /foo.txt",
         "echo 'test' > /foo.txt",
@@ -69,6 +82,17 @@ file_test(
 
 container_run_and_commit(
     name = "test_container_commit",
+    base = "@debian_base//image",
+    cmd = [
+        "touch",
+        "/foo.txt",
+    ],
+    docker_run_flags = "-u root",
+    legacy_load_behavior = False,
+)
+
+container_run_and_commit(
+    name = "test_container_commit_legacy",
     commands = ["touch /foo.txt"],
     docker_run_flags = [
         "-u",
@@ -111,6 +135,7 @@ rule_test(
     name = "test_container_commit_layer_rule",
     generates = [
         "test_container_commit_layer-layer.tar",
+        "test_container_commit_layer.build",
     ],
     rule = "test_container_commit_layer",
 )