From 2531be5d5d833a91dd63435879b9d16df4ecb173 Mon Sep 17 00:00:00 2001
From: Edward Hartwell Goose <ed@mention-me.com>
Date: Mon, 22 Apr 2024 16:18:31 +0100
Subject: [PATCH] Update based on a comment I found

---
 Dockerfile |  92 ++++++++++++------
 README.md  | 277 +++++------------------------------------------------
 common.py  |   2 +-
 3 files changed, 89 insertions(+), 282 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index d0ba7c3b..a149feab 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,54 +1,86 @@
-FROM amazonlinux:2023 as builder
-
-# Set up working directories
-RUN mkdir -p /opt/python
-
-# Install packages
-RUN dnf update -y
-RUN dnf install -y gcc openssl-devel bzip2-devel libffi-devel zlib-devel wget make tar xz
-
-# Download and install Python 3.12
-WORKDIR /opt
-RUN wget https://www.python.org/ftp/python/3.12.1/Python-3.12.1.tar.xz
-RUN tar xvf Python-3.12.1.tar.xz
-WORKDIR /opt/Python-3.12.1
-RUN ./configure --enable-optimizations --prefix=/opt/python
-RUN make -j
-RUN make install
-
-FROM amazonlinux:2023
+FROM --platform=linux/x86_64 public.ecr.aws/lambda/python:3.9
 
 # Set up working directories
 RUN mkdir -p /opt/app
 RUN mkdir -p /opt/app/build
 RUN mkdir -p /opt/app/bin/
 
-# Copy over the python binaries
-COPY --from=builder /opt/python /opt/python
-
 # Copy in the lambda source
 WORKDIR /opt/app
 COPY ./*.py /opt/app/
 COPY requirements.txt /opt/app/requirements.txt
 
 # Install packages
-RUN dnf update -y
-RUN dnf install -y cpio openssl bzip2 libffi yum-utils zip unzip less
+# update security
+RUN : \
+    && yum -y update --security \
+    && yum clean all \
+    && rm -rf /var/cache/yum \
+    && :
 
 # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel
-RUN /opt/python/bin/pip3 install -r requirements.txt
-RUN rm -rf /root/.cache/pip
+# Install required packages
+RUN : \
+    && yum update -y \
+    && yum install -y \
+        cpio \
+        python3 \
+        python3-pip \
+        yum-utils \
+        zip \
+        unzip \
+        less \
+        libtool-ltdl \
+        binutils \
+    && yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm \
+    && pip3 install -r /opt/app/requirements.txt \
+    && yum clean all \
+    && rm -rf /var/cache/yum \
+    && :
 
 # Download libraries we need to run in lambda
 WORKDIR /tmp
-RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update libtool-ltdl
+RUN yumdownloader -x \*i686 --archlist=x86_64 \
+  clamav \
+  clamav-lib \
+  clamav-scanner-systemd \
+  clamav-update \
+  elfutils-libs \
+  json-c \
+  lz4 \
+  pcre2 \
+  systemd-libs \
+  libprelude \
+  gnutls \
+  libtasn1 \
+  lib64nettle \
+  nettle \
+  libtool-ltdl \
+  libxml2 \
+  xz-libs \
+  xz-devel
+
 RUN rpm2cpio clamav-0*.rpm | cpio -idmv
 RUN rpm2cpio clamav-lib*.rpm | cpio -idmv
 RUN rpm2cpio clamav-update*.rpm | cpio -idmv
+RUN rpm2cpio clamd-0*.rpm | cpio -idmv
+RUN rpm2cpio elfutils-libs*.rpm | cpio -idmv
+RUN rpm2cpio json-c*.rpm | cpio -idmv
+RUN rpm2cpio lz4*.rpm | cpio -idmv
+RUN rpm2cpio pcre*.rpm | cpio -idmv
+RUN rpm2cpio systemd-libs*.rpm | cpio -idmv
+RUN rpm2cpio gnutls* | cpio -idmv
+RUN rpm2cpio nettle* | cpio -idmv
+RUN rpm2cpio libtasn1* | cpio -idmv
 RUN rpm2cpio libtool-ltdl* | cpio -idmv
+RUN rpm2cpio libxml2* | cpio -idmv
+RUN rpm2cpio xz-libs* | cpio -idmv
+RUN rpm2cpio xz-devel* | cpio -idmv
+RUN rpm2cpio lib* | cpio -idmv
+RUN rpm2cpio *.rpm | cpio -idmv
 
 # Copy over the binaries and libraries
-RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/
+RUN cp -r /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/
 
 # Fix the freshclam.conf settings
 RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf
@@ -58,7 +90,7 @@ RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf
 WORKDIR /opt/app
 RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py bin
 
-WORKDIR /opt/python/lib/python3.12/site-packages
+WORKDIR /var/lang/lib/python3.9/site-packages
 RUN zip -r9 /opt/app/build/lambda.zip *
 
-WORKDIR /opt/app
+WORKDIR /opt/app
\ No newline at end of file
diff --git a/README.md b/README.md
index 8fd09279..e3413bc2 100644
--- a/README.md
+++ b/README.md
@@ -1,279 +1,54 @@
 # bucket-antivirus-function
 
-[![CI](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/ci.yaml/badge.svg)](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/ci.yaml) [![pre-commit](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/pre-commit.yaml/badge.svg)](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/pre-commit.yaml) [![Test with LocalStack](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/lambda.yaml/badge.svg)](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/lambda.yaml)
+This function is inspired by https://github.com/bluesentry/bucket-antivirus-function.
 
-Scan new objects added to any s3 bucket using AWS Lambda.
-
-## Features
-
-- Easy to install
-- Send events from an unlimited number of S3 buckets
-- Prevent reading of infected files using S3 bucket policies
-- Accesses the end-user’s separate installation of
-open source antivirus engine [ClamAV](http://www.clamav.net/)
-
-## How It Works
-
-![architecture-diagram](../master/images/bucket-antivirus-function.png)
-
-- Each time a new object is added to a bucket, S3 invokes the Lambda
-function to scan the object
-- The function package will download (if needed) current antivirus
-definitions from a S3 bucket. Transfer speeds between a S3 bucket and
-Lambda are typically faster and more reliable than another source
-- The object is scanned for viruses and malware.  Archive files are
-extracted and the files inside scanned also
-- The objects tags are updated to reflect the result of the scan, CLEAN
-or INFECTED, along with the date and time of the scan.
-- Object metadata is updated to reflect the result of the scan (optional)
-- Metrics are sent to [DataDog](https://www.datadoghq.com/) (optional)
-- Scan results are published to a SNS topic (optional) (Optionally choose to only publish INFECTED results)
-- Files found to be INFECTED are automatically deleted (optional)
-
-## Installation
-
-### Build from Source
-
-To build the archive to upload to AWS Lambda, run `make all`.  The build process is completed using
-the [amazonlinux](https://hub.docker.com/_/amazonlinux/) [Docker](https://www.docker.com)
- image.  The resulting archive will be built at `build/lambda.zip`.  This file will be
- uploaded to AWS for both Lambda functions below.
-
-### Create Relevant AWS Infra via CloudFormation
-
-Use CloudFormation with the `cloudformation.yaml` located in the `deploy/` directory to quickly spin up the AWS infra needed to run this project. CloudFormation will create:
+That repository is long out of date, but we've kept it going. There are many forks of it, but they have varying levels of quality and maintanability.
 
-- An S3 bucket that will store AntiVirus definitions.
-- A Lambda Function called `avUpdateDefinitions` that will update the AV Definitions in the S3 Bucket every 3 hours.
-This function accesses the user’s above S3 Bucket to download updated definitions using `freshclam`.
-- A Lambda Function called `avScanner` that is triggered on each new S3 object creation which scans the object and tags it appropriately. It is created with `1600mb` of memory which should be enough, however if you start to see function timeouts, this memory may have to be bumped up. In the past, we recommended using `1024mb`, but that has started causing Lambda timeouts and bumping this memory has resolved it.
-
-Running CloudFormation, it will ask for 2 inputs for this stack:
-
-1. BucketType: `private` (default) or `public`. This is applied to the S3 bucket that stores the AntiVirus definitions. We recommend to only use `public` when other AWS accounts need access to this bucket.
-2. SourceBucket: [a non-empty string]. The name (do not include `s3://`) of the S3 bucket that will have its objects scanned. _Note - this is just used to create the IAM Policy, you can add/change source buckets later via the IAM Policy that CloudFormation outputs_
-
-After the Stack has successfully created, there are 3 manual processes that still have to be done:
-
-1. Upload the `build/lambda.zip` file that was created by running `make all` to the `avUpdateDefinitions` and `avScanner` Lambda functions via the Lambda Console.
-2. To trigger the Scanner function on new S3 objects, go to the `avScanner` Lambda function console, navigate to `Configuration` -> `Trigger` -> `Add Trigger` -> Search for S3, and choose your bucket(s) and select `All object create events`, then click `Add`. _Note - if you chose more than 1 bucket as the source, or chose a different bucket than the Source Bucket in the CloudFormation parameter, you will have to also edit the IAM Role to reflect these new buckets (see "Adding or Changing Source Buckets")_
-3. Navigate to the `avUpdateDefinitions` Lambda function and manually trigger the function to get the initial Clam definitions in the bucket (instead of waiting for the 3 hour trigger to happen). Do this by clicking the `Test` section, and then clicking the orange `test` button. The function should take a few seconds to execute, and when finished you should see the `clam_defs` in the `av-definitions` S3 bucket.
-
-#### Adding or Changing Source Buckets
-
-Changing or adding Source Buckets is done by editing the `AVScannerLambdaRole` IAM Role. More specifically, the `S3AVScan` and `KmsDecrypt` parts of that IAM Role's policy.
-
-### S3 Events
+Scan new objects added to any s3 bucket using AWS Lambda.
 
-Configure scanning of additional buckets by adding a new S3 event to
-invoke the Lambda function.  This is done from the properties of any
-bucket in the AWS console.
+## Overall Structure
 
-![s3-event](../master/images/s3-event.png)
+Read the [README for bluesentry](https://github.com/bluesentry/bucket-antivirus-function/blob/master/README.md) on the overall structure.
 
-Note: If configured to update object metadata, events must only be
-configured for `PUT` and `POST`. Metadata is immutable, which requires
-the function to copy the object over itself with updated metadata. This
-can cause a continuous loop of scanning if improperly configured.
+The below explains some things that might be relevant for debugging/troubleshooting.
 
-## Configuration
+### Dependencies
 
-Runtime configuration is accomplished using environment variables.  See
-the table below for reference.
+`clamav` is installed in a docker image, along with its dependencies. The `Dockerfile` puts all the dynamically linked dependencies in the `/tmp/usr/lib64/` folder before copying them to the `/opt/app/bin` folder.
 
-| Variable | Description | Default | Required |
-| --- | --- | --- | --- |
-| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files |  | Yes |
-| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No |
-| AV_DEFINITION_PATH | Path containing files at runtime | /tmp/clamav_defs | No |
-| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No |
-| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No |
-| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No |
-| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No |
-| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No |
-| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No |
-| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No |
-| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No |
-| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No |
-| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No |
-| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No |
-| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No |
-| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No |
-| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No |
-| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No |
-| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No |
-| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No |
-| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No |
-| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No |
-| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No |
+The `/opt/app/bin` folder is what is eventually deployed.
 
-## S3 Bucket Policy Examples
+If `clamav` is failing, it might have logs in Cloudwatch like:
 
-### Deny to download the object if not "CLEAN"
+> error while loading shared libraries: libjson-c.so.5: cannot open shared object file: No such file or directory
 
-This policy doesn't allow to download the object until:
+This basically means the dynamically linked library can't be found. This probably means you
 
-1. The lambda that run Clam-AV is finished (so the object has a tag)
-2. The file is not CLEAN
+#### Debugging code for testing dependencies
 
-Please make sure to check cloudtrail for the arn:aws:sts, just find the event open it and copy the sts.
-It should be in the format provided below:
+I found it helpful to run:
 
-```json
- {
-    "Effect": "Deny",
-    "NotPrincipal": {
-        "AWS": [
-            "arn:aws:iam::<<aws-account-number>>:role/<<bucket-antivirus-role>>",
-            "arn:aws:sts::<<aws-account-number>>:assumed-role/<<bucket-antivirus-role>>/<<bucket-antivirus-role>>",
-            "arn:aws:iam::<<aws-account-number>>:root"
-        ]
-    },
-    "Action": "s3:GetObject",
-    "Resource": "arn:aws:s3:::<<bucket-name>>/*",
-    "Condition": {
-        "StringNotEquals": {
-            "s3:ExistingObjectTag/av-status": "CLEAN"
-        }
-    }
-}
 ```
-
-### Deny to download and re-tag "INFECTED" object
-
-```json
-{
-  "Version": "2012-10-17",
-  "Statement": [
-    {
-      "Effect": "Deny",
-      "Action": ["s3:GetObject", "s3:PutObjectTagging"],
-      "Principal": "*",
-      "Resource": ["arn:aws:s3:::<<bucket-name>>/*"],
-      "Condition": {
-        "StringEquals": {
-          "s3:ExistingObjectTag/av-status": "INFECTED"
-        }
-      }
-    }
-  ]
-}
+docker run -it amazonlinux:2023 /bin/sh
 ```
 
-## Manually Scanning Buckets
-
-You may want to scan all the objects in a bucket that have not previously been scanned or were created
-prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility.
+Then:
 
-```sh
-pip install boto3
-scan_bucket.py --lambda-function-name=<lambda_function_name> --s3-bucket-name=<s3-bucket-to-scan>
 ```
-
-This tool will scan all objects that have not been previously scanned in the bucket and invoke the lambda function
-asynchronously. As such you'll have to go to your cloudwatch logs to see the scan results or failures. Additionally,
-the script uses the same environment variables you'd use in your lambda so you can configure them similarly.
-
-## Testing
-
-There are two types of tests in this repository. The first is pre-commit tests and the second are python tests. All of
-these tests are run by CircleCI.
-
-### pre-commit Tests
-
-The pre-commit tests ensure that code submitted to this repository meet the standards of the repository. To get started
-with these tests run `make pre_commit_install`. This will install the pre-commit tool and then install it in this
-repository. Then the github pre-commit hook will run these tests before you commit your code.
-
-To run the tests manually run `make pre_commit_tests` or `pre-commit run -a`.
-
-### Python Tests
-
-The python tests in this repository use `unittest` and are run via the `nose` utility. To run them you will need
-to install the developer resources and then run the tests:
-
-```sh
-pip install -r requirements.txt
-pip install -r requirements-dev.txt
-make test
+yum install cpio yum-utils -y
 ```
 
-## Testing with Localstack
-
-You can test the lambda functions locally using [localstack](https://www.localstack.cloud/). This will run the lambda functions in docker containers.
-
-To get started you will need to install [Docker](https://docs.docker.com/install/) and [Docker Compose](https://docs.docker.com/compose/install/).
-
-Then you can run:
-
-```sh
-make archive
-docker compose up localstack -d # start localstack
-aws s3 mb s3://antivirus-definitions --profile localstack # bucket name must match AV_DEFINITION_S3_BUCKET
-aws s3 mb s3://test-bucket --profile localstack # bucket name must match TEST_BUCKET
-wget https://secure.eicar.org/eicar_com.zip
-aws s3 cp eicar_com.zip s3://test-bucket/eicar_com.zip --profile localstack
-aws --endpoint-url=http://localhost:4566 lambda create-function \
-  --function-name update-clamav \
-  --runtime python3.12 \
-  --handler update.lambda_handler \
-  --role arn:aws:iam::123456789012:role/lambda-role \
-  --zip-file fileb://./build/lambda.zip \
-  --timeout 120 \
-  --profile localstack \
-  --environment "Variables={AV_DEFINITION_S3_BUCKET=antivirus-definitions}"
-aws --endpoint-url=http://localhost:4566 lambda invoke \
-  --function-name update-clamav --profile localstack \
-  --invocation-type RequestResponse \
-  --log-type Tail \
-  --payload '{}' \
-  response.json \
-  --query 'LogResult' | tr -d '"' | base64 -d
-aws --endpoint-url=http://localhost:4566 lambda create-function \
-  --function-name scan-clamav \
-  --runtime python3.12 \
-  --handler scan.lambda_handler \
-  --role arn:aws:iam::123456789012:role/lambda-role \
-  --zip-file fileb://./build/lambda.zip \
-  --timeout 120 \
-  --profile localstack \
-  --environment "Variables={AV_DEFINITION_S3_BUCKET=antivirus-definitions,AV_DELETE_INFECTED_FILES=True}"
-aws --endpoint-url=http://localhost:4566 lambda invoke \
-  --function-name scan-clamav --profile localstack \
-  --invocation-type RequestResponse \
-  --log-type Tail \
-  --payload '{"Records": [{"s3": {"bucket": {"name": "test-bucket"}, "object": {"key": "eicar_com.zip"}}}]}' \
-  response.json \
-  --query 'LogResult' | tr -d '"' | base64 -d
-aws s3 ls s3://test-bucket --profile localstack # should be empty
+Then:
+```
+cd /tmp
+yumdownloader -x \*i686 --archlist=x86_64 json-c
+rpm2cpio json-c*.rpm | cpio -idmv
 ```
 
-Note1: The `--profile localstack` is only needed if you have a profile named `localstack` in your `~/.aws/config` and `~/.aws/credentials` file. See [localstack docs](https://docs.localstack.cloud/user-guide/integrations/aws-cli/#aws-cli) for more info.
-
-Note2: The `--endpoint-url` is only needed if you are not running localstack on the default port of `4566`.
-
-Note3: The `--query 'LogResult' | tr -d '"' | base64 -d` is only needed if you want to see the logs from the lambda function.
-
-Note4: localstack will drop all file when it is stopped. If you want to keep the files you will need to copy them to a real s3 bucket.
-
-## License
-
-```text
-Upside Travel, Inc.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+Then look in `/tmp/usr/lib64/` to see what is in there. If your file, e.g. `libjson-c.so` is in there then it'll be included. If it isn't, you need to figure out the right incantations to add it.
 
-http://www.apache.org/licenses/LICENSE-2.0
+Once you run `DOCKER_BUILDKIT=0 make all` (I find it easier to do debugging if BUILDKIT is off) a zip file will be produced.
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-```
+You can also check in that to see if any of the files you expect to see (e.g. `libjson-c.so`) are missing/present.
 
-ClamAV is released under the [GPL Version 2 License](https://github.com/vrtadmin/clamav-devel/blob/master/COPYING)
-and all [source for ClamAV](https://github.com/vrtadmin/clamav-devel) is available
-for download on Github.
+Finally, you can upload the `deploy/lambda.zip` into Lambda's console to get it running.
\ No newline at end of file
diff --git a/common.py b/common.py
index eeacb156..ea05c826 100644
--- a/common.py
+++ b/common.py
@@ -59,4 +59,4 @@ def create_dir(path):
 
 
 def get_timestamp():
-    return datetime.datetime.now(datetime.UTC).strftime("%Y/%m/%d %H:%M:%S UTC")
+    return datetime.datetime.now(datetime.timezone.utc).strftime("%Y/%m/%d %H:%M:%S UTC")