Incremental commit and extract.

bazelbuild · May 11, 2020 · 27643b2 · 27643b2
1 parent 131eb11
commit 27643b2
Show file tree

Hide file tree

Showing 9 changed files with 239 additions and 276 deletions.
diff --git a/container/image.bzl b/container/image.bzl
@@ -311,7 +311,13 @@ def _impl(
         output_layer = None,
         workdir = None,
         null_cmd = None,
-        null_entrypoint = None):
+        null_entrypoint = None,
+        commit = False,
+        commit_output = None,
+        extract = False,
+        extract_path = None,
+        extract_output = None,
+        action_run = False):
     """Implementation for the container_image rule.
 
   Args:
@@ -344,6 +350,12 @@ def _impl(
     workdir: str, overrides ctx.attr.workdir
     null_cmd: bool, overrides ctx.attr.null_cmd
     null_entrypoint: bool, overrides ctx.attr.null_entrypoint
+    commit: bool, whether to run the container and commit the result
+    commit_output: File to use as output for the committed container
+    extract: bool, whether to run the container and extract a file from it
+    extract_path: str, path to the file to extract from the container
+    extract_output: File to copy the extract file to
+    action_run: bool, whether output_executable is going to be run as an action
   """
     name = name or ctx.label.name
     entrypoint = entrypoint or ctx.attr.entrypoint
@@ -359,7 +371,6 @@ def _impl(
     output_digest = output_digest or ctx.outputs.digest
     output_config = output_config or ctx.outputs.config
     output_layer = output_layer or ctx.outputs.layer
-    build_script = ctx.outputs.build_script
     null_cmd = null_cmd or ctx.attr.null_cmd
     null_entrypoint = null_entrypoint or ctx.attr.null_entrypoint
 
@@ -369,15 +380,21 @@ def _impl(
     # We do not use the default argument of attrs.string() in order to distinguish between
     # an image using the default and an image intentionally overriding the base's run flags.
     # Since this is a string attribute, the default value is the empty string.
+    docker_run_flags_are_default = True
+
+    # Run the container using host networking, so that the service is
+    # available to the developer without having to poke around with
+    # docker inspect.
+    docker_run_flags = "-i --rm --network=host"
     if ctx.attr.docker_run_flags != "":
+        docker_run_flags_are_default = False
         docker_run_flags = ctx.attr.docker_run_flags
     elif ctx.attr.base and ImageInfo in ctx.attr.base:
+        docker_run_flags_are_default = False
         docker_run_flags = ctx.attr.base[ImageInfo].docker_run_flags
-    else:
-        # Run the container using host networking, so that the service is
-        # available to the developer without having to poke around with
-        # docker inspect.
-        docker_run_flags = "-i --rm --network=host"
+    elif commit or extract:
+        docker_run_flags_are_default = False
+        docker_run_flags = None
 
     if ctx.attr.launcher:
         if not file_map:
@@ -491,12 +508,21 @@ def _impl(
         tag_name: container_parts,
     }
 
+    commit_base_config = parent_parts.get("config")
     _incr_load(
         ctx,
         images,
         build_executable,
         run = not ctx.attr.legacy_run_behavior,
         run_flags = docker_run_flags,
+        commit = commit,
+        commit_name = tag_name + "_commit_output",
+        commit_base_config = commit_base_config,
+        commit_output = commit_output,
+        extract = extract,
+        extract_path = extract_path,
+        extract_output = extract_output,
+        action_run = action_run,
     )
 
     _assemble_image(
@@ -515,15 +541,14 @@ def _impl(
     )
 
     runfiles = ctx.runfiles(
-        files = unzipped_layers + diff_ids + [config_file, config_digest] +
-                ([container_parts["legacy"]] if container_parts["legacy"] else []),
+        files = [x for x in unzipped_layers + diff_ids + [commit_base_config, config_file, config_digest, container_parts["legacy"]] if x != None],
     )
 
     return [
         ImageInfo(
             container_parts = container_parts,
             legacy_run_behavior = ctx.attr.legacy_run_behavior,
-            docker_run_flags = docker_run_flags,
+            docker_run_flags = "" if docker_run_flags_are_default else docker_run_flags,
         ),
         DefaultInfo(
             executable = build_executable,
@@ -737,28 +762,28 @@ def _validate_command(name, argument, operating_system):
 def container_image(**kwargs):
     """Package a docker image.
 
-  This rule generates a sequence of genrules the last of which is named 'name',
-  so the dependency graph works out properly.  The output of this rule is a
-  tarball compatible with 'docker save/load' with the structure:
-    {layer-name}:
-      layer.tar
-      VERSION
-      json
-    {image-config-sha256}.json
-    ...
-    manifest.json
-    repositories
-    top     # an implementation detail of our rules, not consumed by Docker.
-  This rule appends a single new layer to the tarball of this form provided
-  via the 'base' parameter.
-
-  The images produced by this rule are always named 'bazel/tmp:latest' when
-  loaded (an internal detail).  The expectation is that the images produced
-  by these rules will be uploaded using the 'docker_push' rule below.
-
-  Args:
-    **kwargs: See above.
-  """
+    This rule generates a sequence of genrules the last of which is named 'name',
+    so the dependency graph works out properly.  The output of this rule is a
+    tarball compatible with 'docker save/load' with the structure:
+        {layer-name}:
+        layer.tar
+        VERSION
+        json
+        {image-config-sha256}.json
+        ...
+        manifest.json
+        repositories
+        top     # an implementation detail of our rules, not consumed by Docker.
+    This rule appends a single new layer to the tarball of this form provided
+    via the 'base' parameter.
+
+    The images produced by this rule are always named 'bazel/tmp:latest' when
+    loaded (an internal detail).  The expectation is that the images produced
+    by these rules will be uploaded using the 'docker_push' rule below.
+
+    Args:
+        **kwargs: See above.
+    """
     operating_system = None
 
     if ("operating_system" in kwargs):

diff --git a/container/incremental_load.sh.tpl b/container/incremental_load.sh.tpl
@@ -19,14 +19,19 @@ set -eu
 # This is a generated file that loads all docker layers built by "docker_build".
 
 function guess_runfiles() {
+  if [[ "%{action_run}" == "True" ]]; then
+    # The script is running as an action
+    pwd
+  else
     if [ -d ${BASH_SOURCE[0]}.runfiles ]; then
-        # Runfiles are adjacent to the current script.
-        echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )"
+      # Runfiles are adjacent to the current script.
+      echo "$( cd ${BASH_SOURCE[0]}.runfiles && pwd )"
     else
-        # The current script is within some other script's runfiles.
-        mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-        echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|'
+      # The current script is within some other script's runfiles.
+      mydir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+      echo $mydir | sed -e 's|\(.*\.runfiles\)/.*|\1|'
     fi
+  fi
 }
 
 RUNFILES="${PYTHON_RUNFILES:-$(guess_runfiles)}"
@@ -35,8 +40,8 @@ DOCKER="%{docker_tool_path}"
 DOCKER_FLAGS="%{docker_flags}"
 
 if [[ -z "${DOCKER}" ]]; then
-    echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
-    exit 1
+  echo >&2 "error: docker not found; do you need to manually configure the docker toolchain?"
+  exit 1
 fi
 
 # Create temporary files in which to record things to clean up.
@@ -130,7 +135,7 @@ function import_config() {
   local tmp_dir="$(mktemp -d)"
   echo "${tmp_dir}" >> "${TEMP_FILES}"
 
-  cd "${tmp_dir}"
+  pushd "${tmp_dir}" >/dev/null
 
   # Docker elides layer reads from the tarball when it
   # already has a copy of the layer with the same basis
@@ -206,6 +211,8 @@ EOF
   # and then streaming exactly the layers we've established are
   # needed into the Docker daemon.
   tar cPh "${MISSING[@]}" | tee image.tar | "${DOCKER}" ${DOCKER_FLAGS} load
+
+  popd >/dev/null
 }
 
 function tag_layer() {
@@ -239,6 +246,36 @@ function read_variables() {
 # This generated and injected by docker_*.
 %{tag_statements}
 
+# Optional statements to extract files from the container
+if [[ "%{extract}" == "True" ]]; then
+  id=$(%{run_statements})
+  retcode=$($DOCKER $DOCKER_FLAGS wait $id)
+  if [ $retcode != 0 ]; then
+    $DOCKER $DOCKER_FLAGS logs $id && false
+  fi
+  $DOCKER $DOCKER_FLAGS cp $id:%{extract_path} %{extract_output}
+  $DOCKER $DOCKER_FLAGS rm $id
+  exit
+fi
+
+# Optional statements to commit changes to the container as a new container
+if [[ "%{commit}" == "True" ]]; then
+  id=$(%{run_statements})
+  retcode=$($DOCKER $DOCKER_FLAGS wait $id)
+  if [ $retcode != 0 ]; then
+    $DOCKER $DOCKER_FLAGS logs $id && false
+  fi
+  config=$(< %{commit_base_config})
+  cmd='["/bin/sh", "-c", "/bin/bash"]'
+  regex='\"Cmd\" ?: ?(\[[^]]*\])'
+  if [[ config =~ regex ]]; then
+    cmd=${BASH_REMATCH[1]}
+  fi
+  $DOCKER $DOCKER_FLAGS commit -c "CMD $cmd" $id %{output_image}
+  $DOCKER $DOCKER_FLAGS save %{output_image} -o %{output_tar}
+  $DOCKER $DOCKER_FLAGS rm $id
+fi
+
 # An optional "docker run" statement for invoking a loaded container.
 # This is not executed if the single argument --norun is passed or
 # no run_statements are generated (in which case, 'run' is 'False').

diff --git a/container/layer_tools.bzl b/container/layer_tools.bzl
@@ -189,7 +189,15 @@ def incremental_load(
         output,
         stamp = False,
         run = False,
-        run_flags = None):
+        run_flags = None,
+        commit = False,
+        commit_name = None,
+        commit_base_config = None,
+        commit_output = None,
+        extract = False,
+        extract_path = None,
+        extract_output = None,
+        action_run = False):
     """Generate the incremental load statement.
 
 
@@ -200,6 +208,14 @@ def incremental_load(
        stamp: Whether to stamp the produced image
        run: Whether to run the script or not
        run_flags: Additional run flags
+       commit: bool, whether to run the container and commit the result
+       commit_name: str, name to commit the new container as
+       commit_base_config: File to copy the restore the original command from
+       commit_output: File to use as output for the committed container
+       extract: bool, whether to run the container and extract a file from it
+       extract_path: str, path to the file to extract from the container
+       extract_output: File to copy the extract file to
+       action_run: bool, whether output_executable is going to be run as an action
     """
     stamp_files = []
     if stamp:
@@ -210,14 +226,33 @@ def incremental_load(
     # Default to interactively launching the container,
     # and cleaning up when it exits.
 
-    run_flags = run_flags or "-i --rm"
+    if not run_flags:
+        if commit or extract:
+            run_flags = ""
+        else:
+            run_flags = "-i --rm"
 
     if len(images) > 1 and run:
         fail("Bazel run does not currently support execution of " +
              "multiple containers (only loading).")
+    if commit:
+        if not commit_name:
+            fail("Missing commit_name.")
+        if not commit_base_config:
+            fail("Missing commit_base_config.")
+        if not commit_output:
+            fail("Missing commit_output.")
+    if extract:
+        if run:
+            fail("Cannot execute a container and extract a file from it at the same time.")
+        if not extract_path:
+            fail("Missing extract_path.")
+        if not extract_output:
+            fail("Missing extract_output.")
 
     load_statements = []
     tag_statements = []
+    extract_statements = []
     run_statements = []
 
     # TODO(mattmoor): Consider adding cleanup_statements.
@@ -227,7 +262,7 @@ def incremental_load(
         # First load the legacy base image, if it exists.
         if image.get("legacy"):
             load_statements += [
-                "load_legacy '%s'" % _get_runfile_path(ctx, image["legacy"]),
+                "load_legacy '%s'" % (image["legacy"].path if action_run else _get_runfile_path(ctx, image["legacy"])),
             ]
 
         pairs = zip(image["diff_id"], image["unzipped_layer"])
@@ -236,11 +271,11 @@ def incremental_load(
         # in the daemon.
         load_statements += [
             "import_config '%s' %s" % (
-                _get_runfile_path(ctx, image["config"]),
+                image["config"].path if action_run else _get_runfile_path(ctx, image["config"]),
                 " ".join([
                     "'%s' '%s'" % (
-                        _get_runfile_path(ctx, diff_id),
-                        _get_runfile_path(ctx, unzipped_layer),
+                        diff_id.path if action_run else _get_runfile_path(ctx, diff_id),
+                        unzipped_layer.path if action_run else _get_runfile_path(ctx, unzipped_layer),
                     )
                     for (diff_id, unzipped_layer) in pairs
                 ]),
@@ -255,10 +290,13 @@ def incremental_load(
                 # It is notable that the only legal use of '{' in a
                 # tag would be for stamp variables, '$' is not allowed.
                 tag_reference,
-                _get_runfile_path(ctx, image["config_digest"]),
+                image["config_digest"].path if action_run else _get_runfile_path(ctx, image["config_digest"]),
             ),
         ]
-        if run:
+        if commit or extract:
+            if not "-d" in run_flags and not "--detach" in run_flags:
+                run_flags += " -d"
+        if run or commit or extract:
             # Args are embedded into the image, so omitted here.
             run_statements += [
                 "\"${DOCKER}\" ${DOCKER_FLAGS} run %s %s" % (run_flags, tag_reference),
@@ -269,14 +307,22 @@ def incremental_load(
         substitutions = {
             "%{docker_flags}": " ".join(toolchain_info.docker_flags),
             "%{docker_tool_path}": toolchain_info.tool_path,
+            "%{action_run}": str(action_run),
             "%{load_statements}": "\n".join(load_statements),
             "%{run_statements}": "\n".join(run_statements),
             "%{run}": str(run),
+            "%{commit}": str(commit),
+            "%{commit_base_config}": commit_base_config.path if commit_base_config else "",
+            "%{output_image}": commit_name if commit_name else "",
+            "%{output_tar}": commit_output.path if commit_output else "",
+            "%{extract_path}": extract_path if extract_path else "",
+            "%{extract_output}": extract_output.path if extract_output else "",
+            "%{extract}": str(extract),
             # If this rule involves stamp variables than load them as bash
             # variables, and turn references to them into bash variable
             # references.
             "%{stamp_statements}": "\n".join([
-                "read_variables %s" % _get_runfile_path(ctx, f)
+                "read_variables %s" % (f.path if action_run else _get_runfile_path(ctx, f))
                 for f in stamp_files
             ]),
             "%{tag_statements}": "\n".join(tag_statements),

diff --git a/docker/util/BUILD b/docker/util/BUILD
@@ -31,8 +31,6 @@ py_binary(
 )
 
 exports_files([
-    "commit.sh.tpl",
-    "extract.sh.tpl",
     "image_util.sh.tpl",
 ])