incremental extract

bazelbuild · Apr 25, 2022 · 8a7c792 · 8a7c792
1 parent 6ea707b
commit 8a7c792
Show file tree

Hide file tree

Showing 9 changed files with 738 additions and 133 deletions.
diff --git a/container/image.bzl b/container/image.bzl
@@ -297,7 +297,9 @@ def _impl(
         output_layer = None,
         workdir = None,
         null_cmd = None,
-        null_entrypoint = None):
+        null_entrypoint = None,
+        action_run = False,
+        docker_run_flags = None):
     """Implementation for the container_image rule.
 
     You can write a customized container_image rule by writing something like:
@@ -354,6 +356,7 @@ def _impl(
         workdir: str, overrides ctx.attr.workdir
         null_cmd: bool, overrides ctx.attr.null_cmd
         null_entrypoint: bool, overrides ctx.attr.null_entrypoint
+        action_run: bool, whether output_executable is going to be run as an action
     """
     name = name or ctx.label.name
     entrypoint = entrypoint or ctx.attr.entrypoint
@@ -371,7 +374,6 @@ def _impl(
     output_config = output_config or ctx.outputs.config
     output_config_digest = output_config_digest or ctx.outputs.config_digest
     output_layer = output_layer or ctx.outputs.layer
-    build_script = ctx.outputs.build_script
     null_cmd = null_cmd or ctx.attr.null_cmd
     null_entrypoint = null_entrypoint or ctx.attr.null_entrypoint
 
@@ -381,15 +383,19 @@ def _impl(
     # We do not use the default argument of attrs.string() in order to distinguish between
     # an image using the default and an image intentionally overriding the base's run flags.
     # Since this is a string attribute, the default value is the empty string.
-    if ctx.attr.docker_run_flags != "":
-        docker_run_flags = ctx.attr.docker_run_flags
-    elif ctx.attr.base and ImageInfo in ctx.attr.base:
-        docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
-    else:
-        # Run the container using host networking, so that the service is
-        # available to the developer without having to poke around with
-        # docker inspect.
-        docker_run_flags = "-i --rm --network=host"
+    docker_run_flags_are_default = False
+    if docker_run_flags == None:
+        if ctx.attr.docker_run_flags != "":
+            docker_run_flags = ctx.attr.docker_run_flags
+        elif ctx.attr.base and ImageInfo in ctx.attr.base:
+            docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
+        else:
+            docker_run_flags_are_default = True
+
+            # Run the container using host networking, so that the service is
+            # available to the developer without having to poke around with
+            # docker inspect.
+            docker_run_flags = "-i --rm --network=host"
 
     if ctx.attr.launcher:
         if not file_map:
@@ -509,6 +515,7 @@ def _impl(
         build_executable,
         run = not ctx.attr.legacy_run_behavior,
         run_flags = docker_run_flags,
+        action_run = action_run,
     )
 
     _assemble_image(
@@ -540,7 +547,7 @@ def _impl(
         ImageInfo(
             container_parts = container_parts,
             legacy_run_behavior = ctx.attr.legacy_run_behavior,
-            docker_run_flags = docker_run_flags,
+            docker_run_flags = "" if docker_run_flags_are_default and not ctx.attr.legacy_run_behavior else docker_run_flags,
         ),
         DefaultInfo(
             executable = build_executable,

diff --git a/container/incremental_load.sh.tpl b/container/incremental_load.sh.tpl
@@ -19,13 +19,18 @@ set -eu
 # This is a generated file that loads all docker layers built by "docker_build".
 
 function guess_runfiles() {
-    if [ -d ${BASH_SOURCE[0]}.runfiles ]; then
-        # Runfiles are adjacent to the current script.
-        echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )"
+    if [[ "%{action_run}" == "True" ]]; then
+        # The script is running as an action
+        pwd
     else
-        # The current script is within some other script's runfiles.
-        mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-        echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|'
+        if [ -d ${BASH_SOURCE[0]}.runfiles ]; then
+            # Runfiles are adjacent to the current script.
+            echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )"
+        else
+            # The current script is within some other script's runfiles.
+            mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+            echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|'
+        fi
     fi
 }
 
@@ -136,7 +141,7 @@ function import_config() {
   local tmp_dir="$(mktemp -d)"
   echo "${tmp_dir}" >> "${TEMP_FILES}"
 
-  cd "${tmp_dir}"
+  pushd "${tmp_dir}" >/dev/null
 
   # Docker elides layer reads from the tarball when it
   # already has a copy of the layer with the same basis
@@ -212,6 +217,8 @@ EOF
   # and then streaming exactly the layers we've established are
   # needed into the Docker daemon.
   tar cPh "${MISSING[@]}" | "${DOCKER}" ${DOCKER_FLAGS} load
+
+  popd >/dev/null
 }
 
 function tag_layer() {
@@ -279,13 +286,10 @@ if [[ "%{run}" == "True" ]]; then
       esac
   done
 
-  # Once we've loaded the images for all layers, we no longer need the temporary files on disk.
-  # We can clean up before we exec docker, since the exit handler will no longer run.
-  cleanup
-
+  # This generated and injected by docker_*.
+  args=(%{run_statement})
   # Bash treats empty arrays as unset variables for the purposes of `set -u`, so we only
   # conditionally add these arrays to our args.
-  args=(%{run_statement})
   if [[ ${#docker_args[@]} -gt 0 ]]; then
     args+=("${docker_args[@]}")
   fi
@@ -294,6 +298,14 @@ if [[ "%{run}" == "True" ]]; then
     args+=("${container_args[@]}")
   fi
 
-  # This generated and injected by docker_*.
-  eval exec "${args[@]}"
+  if [[ "%{action_run}" == "True" ]]; then
+    # This will be used by other scripts that are concatenated to this one.
+    id=$("${args[@]}")
+  else
+    # Once we've loaded the images for all layers, we no longer need the temporary files on disk.
+    # We can clean up before we exec docker, since the exit handler will no longer run.
+    cleanup
+
+    eval exec "${args[@]}"
+  fi
 fi
diff --git a/container/layer_tools.bzl b/container/layer_tools.bzl
@@ -196,7 +196,8 @@ def incremental_load(
         output,
         stamp = False,
         run = False,
-        run_flags = None):
+        run_flags = None,
+        action_run = False):
     """Generate the incremental load statement.
 
 
@@ -207,6 +208,7 @@ def incremental_load(
        stamp: Whether to stamp the produced image
        run: Whether to run the script or not
        run_flags: Additional run flags
+       action_run: bool, whether output_executable is going to be run as an action
     """
     stamp_files = []
     if stamp:
@@ -237,7 +239,7 @@ def incremental_load(
         # First load the legacy base image, if it exists.
         if image.get("legacy"):
             load_statements.append(
-                "load_legacy '%s'" % _get_runfile_path(ctx, image["legacy"]),
+                "load_legacy '%s'" % (image["legacy"].path if action_run else _get_runfile_path(ctx, image["legacy"])),
             )
 
         pairs = zip(image["diff_id"], image["unzipped_layer"])
@@ -246,11 +248,11 @@ def incremental_load(
         # in the daemon.
         load_statements.append(
             "import_config '%s' %s" % (
-                _get_runfile_path(ctx, image["config"]),
+                image["config"].path if action_run else _get_runfile_path(ctx, image["config"]),
                 " ".join([
                     "'%s' '%s'" % (
-                        _get_runfile_path(ctx, diff_id),
-                        _get_runfile_path(ctx, unzipped_layer),
+                        diff_id.path if action_run else _get_runfile_path(ctx, diff_id),
+                        unzipped_layer.path if action_run else _get_runfile_path(ctx, unzipped_layer),
                     )
                     for (diff_id, unzipped_layer) in pairs
                 ]),
@@ -265,7 +267,7 @@ def incremental_load(
                 # It is notable that the only legal use of '{' in a
                 # tag would be for stamp variables, '$' is not allowed.
                 tag_reference,
-                _get_runfile_path(ctx, image["config_digest"]),
+                image["config_digest"].path if action_run else _get_runfile_path(ctx, image["config_digest"]),
             ),
         )
 
@@ -274,6 +276,7 @@ def incremental_load(
         substitutions = {
             "%{docker_flags}": " ".join(toolchain_info.docker_flags),
             "%{docker_tool_path}": docker_path(toolchain_info),
+            "%{action_run}": str(action_run),
             "%{load_statements}": "\n".join(load_statements),
             "%{run_statement}": run_statement,
             "%{run_tag}": run_tag,
@@ -282,7 +285,7 @@ def incremental_load(
             # variables, and turn references to them into bash variable
             # references.
             "%{stamp_statements}": "\n".join([
-                "read_variables %s" % _get_runfile_path(ctx, f)
+                "read_variables %s" % (f.path if action_run else _get_runfile_path(ctx, f))
                 for f in stamp_files
             ]),
             "%{tag_statements}": "\n".join(tag_statements),

diff --git a/docker/util/commit.sh.tpl b/docker/util/commit.sh.tpl
@@ -1,34 +1,53 @@
 #!/usr/bin/env bash
 
-set -o errexit
+if %{legacy_load_behavior}; then
+    set -o errexit
 
-# Setup tools and load utils
-TO_JSON_TOOL="%{to_json_tool}"
-source %{util_script}
+    # Setup tools and load utils
+    TO_JSON_TOOL="%{to_json_tool}"
+    source %{util_script}
 
-# Resolve the docker tool path
-DOCKER="%{docker_tool_path}"
-DOCKER_FLAGS="%{docker_flags}"
+    # Resolve the docker tool path
+    DOCKER="%{docker_tool_path}"
+    DOCKER_FLAGS="%{docker_flags}"
 
-if [[ -z "$DOCKER" ]]; then
-    echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
-    exit 1
+    if [[ -z "$DOCKER" ]]; then
+        echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
+        exit 1
+    fi
 fi
 
 logfile=$(output_logfile)
-
 if ! (
-    # Load the image and remember its name
-    image_id=$(%{image_id_extractor_path} %{image_tar})
-    $DOCKER $DOCKER_FLAGS load -i %{image_tar}
-
-    readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} $image_id %{commands})
     retcode=0
-    if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
-        reset_cmd $image_id $id %{output_image}
-        $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar}
+
+    if %{legacy_load_behavior}; then
+        # Load the image and remember its name
+        image_id=$(%{image_id_extractor_path} %{image_tar})
+        $DOCKER $DOCKER_FLAGS load -i %{image_tar}
+
+        readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} $image_id %{commands})
+        if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
+            reset_cmd $image_id $id %{output_image}
+            $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar}
+        else
+            retcode=$?
+        fi
     else
-        retcode=$?
+        # Actually wait for the container to finish running its commands
+        retcode=$($DOCKER $DOCKER_FLAGS wait $id)
+        # Trigger a failure if the run had a non-zero exit status
+        if [ $retcode != 0 ]; then
+            $DOCKER $DOCKER_FLAGS logs $id && false
+        fi
+        config=$(< %{commit_base_config})
+        cmd='["/bin/sh", "-c"]'
+        regex='\"Cmd\" ?: ?(\[[^]]*\])'
+        if [[ config =~ regex ]]; then
+            cmd=${BASH_REMATCH[1]}
+        fi
+        $DOCKER $DOCKER_FLAGS commit -c "CMD $cmd" $id %{output_image}
+        $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar}
     fi
 
     $DOCKER $DOCKER_FLAGS rm $id

diff --git a/docker/util/commit_layer.sh.tpl b/docker/util/commit_layer.sh.tpl
@@ -1,42 +1,70 @@
 #!/usr/bin/env bash
 
-set -o errexit
-
 # Load utils
 source %{util_script}
 
-# Resolve the docker tool path
-DOCKER="%{docker_tool_path}"
-DOCKER_FLAGS="%{docker_flags}"
+if %{legacy_load_behavior}; then
+  set -o errexit
 
-if [[ -z "$DOCKER" ]]; then
-    echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
-    exit 1
+  # Resolve the docker tool path
+  DOCKER="%{docker_tool_path}"
+  DOCKER_FLAGS="%{docker_flags}"
+
+  if [[ -z "$DOCKER" ]]; then
+      echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
+      exit 1
+  fi
 fi
 
 logfile=$(output_logfile)
 
 if ! (
-    # Load the image and remember its name
-    image_id=$(%{image_id_extractor_path} %{image_tar})
-    $DOCKER $DOCKER_FLAGS load -i %{image_tar}
-
-    readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands})
-    retcode=0
-    if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
-        OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar"
-        reset_cmd $image_id $id %{output_image}
+    OUTPUT_IMAGE_TAR="%{output_layer_tar}.image.tar"
+    if %{legacy_load_behavior}; then
+        # Load the image and remember its name
+        image_id=$(%{image_id_extractor_path} %{image_tar})
+        $DOCKER $DOCKER_FLAGS load -i %{image_tar}
+
+        readonly id=$($DOCKER $DOCKER_FLAGS create %{docker_run_flags} --env-file %{env_file_path} $image_id %{commands})
+        retcode=0
+        if $DOCKER $DOCKER_FLAGS start -a "${id}"; then
+            reset_cmd $image_id $id %{output_image}
+            $DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR
+
+            # Extract the last layer from the image - this will be the layer generated by $DOCKER commit
+            %{image_last_layer_extractor_path} $OUTPUT_IMAGE_TAR %{output_layer_tar} %{output_diff_id}
+
+            # Delete the intermediate tar
+            rm $OUTPUT_IMAGE_TAR
+        else
+            retcode=$?
+        fi
+    else
+        # Actually wait for the container to finish running its commands
+        retcode=$($DOCKER $DOCKER_FLAGS wait $id)
+        # Trigger a failure if the run had a non-zero exit status
+        if [ $retcode != 0 ]; then
+            $DOCKER $DOCKER_FLAGS logs $id && false
+        fi
+        config=$(< %{commit_base_config})
+        cmd='["/bin/sh", "-c"]'
+        regex='\"Cmd\" ?: ?(\[[^]]*\])'
+        if [[ config =~ regex ]]; then
+          cmd=${BASH_REMATCH[1]}
+        fi
+
+        $DOCKER $DOCKER_FLAGS commit -c "CMD $cmd" $id %{output_image}
+
         $DOCKER $DOCKER_FLAGS save %{output_image} -o $OUTPUT_IMAGE_TAR
 
         # Extract the last layer from the image - this will be the layer generated by $DOCKER commit
         %{image_last_layer_extractor_path} $OUTPUT_IMAGE_TAR %{output_layer_tar} %{output_diff_id}
 
         # Delete the intermediate tar
         rm $OUTPUT_IMAGE_TAR
-    else
-        retcode=$?
     fi
 
+
     # Delete the container and the intermediate image
     $DOCKER $DOCKER_FLAGS rm $id
     $DOCKER $DOCKER_FLAGS rmi %{output_image}