From 2531be5d5d833a91dd63435879b9d16df4ecb173 Mon Sep 17 00:00:00 2001 From: Edward Hartwell Goose Date: Mon, 22 Apr 2024 16:18:31 +0100 Subject: [PATCH] Update based on a comment I found --- Dockerfile | 92 ++++++++++++------ README.md | 277 +++++------------------------------------------------ common.py | 2 +- 3 files changed, 89 insertions(+), 282 deletions(-) diff --git a/Dockerfile b/Dockerfile index d0ba7c3b..a149feab 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,54 +1,86 @@ -FROM amazonlinux:2023 as builder - -# Set up working directories -RUN mkdir -p /opt/python - -# Install packages -RUN dnf update -y -RUN dnf install -y gcc openssl-devel bzip2-devel libffi-devel zlib-devel wget make tar xz - -# Download and install Python 3.12 -WORKDIR /opt -RUN wget https://www.python.org/ftp/python/3.12.1/Python-3.12.1.tar.xz -RUN tar xvf Python-3.12.1.tar.xz -WORKDIR /opt/Python-3.12.1 -RUN ./configure --enable-optimizations --prefix=/opt/python -RUN make -j -RUN make install - -FROM amazonlinux:2023 +FROM --platform=linux/x86_64 public.ecr.aws/lambda/python:3.9 # Set up working directories RUN mkdir -p /opt/app RUN mkdir -p /opt/app/build RUN mkdir -p /opt/app/bin/ -# Copy over the python binaries -COPY --from=builder /opt/python /opt/python - # Copy in the lambda source WORKDIR /opt/app COPY ./*.py /opt/app/ COPY requirements.txt /opt/app/requirements.txt # Install packages -RUN dnf update -y -RUN dnf install -y cpio openssl bzip2 libffi yum-utils zip unzip less +# update security +RUN : \ + && yum -y update --security \ + && yum clean all \ + && rm -rf /var/cache/yum \ + && : # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN /opt/python/bin/pip3 install -r requirements.txt -RUN rm -rf /root/.cache/pip +# Install required packages +RUN : \ + && yum update -y \ + && yum install -y \ + cpio \ + python3 \ + python3-pip \ + yum-utils \ + zip \ + unzip \ + less \ + libtool-ltdl \ + binutils \ + && yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm \ + && pip3 install -r /opt/app/requirements.txt \ + && yum clean all \ + && rm -rf /var/cache/yum \ + && : # Download libraries we need to run in lambda WORKDIR /tmp -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update libtool-ltdl +RUN yumdownloader -x \*i686 --archlist=x86_64 \ + clamav \ + clamav-lib \ + clamav-scanner-systemd \ + clamav-update \ + elfutils-libs \ + json-c \ + lz4 \ + pcre2 \ + systemd-libs \ + libprelude \ + gnutls \ + libtasn1 \ + lib64nettle \ + nettle \ + libtool-ltdl \ + libxml2 \ + xz-libs \ + xz-devel + RUN rpm2cpio clamav-0*.rpm | cpio -idmv RUN rpm2cpio clamav-lib*.rpm | cpio -idmv RUN rpm2cpio clamav-update*.rpm | cpio -idmv +RUN rpm2cpio clamd-0*.rpm | cpio -idmv +RUN rpm2cpio elfutils-libs*.rpm | cpio -idmv +RUN rpm2cpio json-c*.rpm | cpio -idmv +RUN rpm2cpio lz4*.rpm | cpio -idmv +RUN rpm2cpio pcre*.rpm | cpio -idmv +RUN rpm2cpio systemd-libs*.rpm | cpio -idmv +RUN rpm2cpio gnutls* | cpio -idmv +RUN rpm2cpio nettle* | cpio -idmv +RUN rpm2cpio libtasn1* | cpio -idmv RUN rpm2cpio libtool-ltdl* | cpio -idmv +RUN rpm2cpio libxml2* | cpio -idmv +RUN rpm2cpio xz-libs* | cpio -idmv +RUN rpm2cpio xz-devel* | cpio -idmv +RUN rpm2cpio lib* | cpio -idmv +RUN rpm2cpio *.rpm | cpio -idmv # Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/ +RUN cp -r /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/ # Fix the freshclam.conf settings RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf @@ -58,7 +90,7 @@ RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf WORKDIR /opt/app RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py bin -WORKDIR /opt/python/lib/python3.12/site-packages +WORKDIR /var/lang/lib/python3.9/site-packages RUN zip -r9 /opt/app/build/lambda.zip * -WORKDIR /opt/app +WORKDIR /opt/app \ No newline at end of file diff --git a/README.md b/README.md index 8fd09279..e3413bc2 100644 --- a/README.md +++ b/README.md @@ -1,279 +1,54 @@ # bucket-antivirus-function -[![CI](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/ci.yaml/badge.svg)](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/ci.yaml) [![pre-commit](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/pre-commit.yaml/badge.svg)](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/pre-commit.yaml) [![Test with LocalStack](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/lambda.yaml/badge.svg)](https://github.com/Georepublic/bucket-antivirus-function/actions/workflows/lambda.yaml) +This function is inspired by https://github.com/bluesentry/bucket-antivirus-function. -Scan new objects added to any s3 bucket using AWS Lambda. - -## Features - -- Easy to install -- Send events from an unlimited number of S3 buckets -- Prevent reading of infected files using S3 bucket policies -- Accesses the end-user’s separate installation of -open source antivirus engine [ClamAV](http://www.clamav.net/) - -## How It Works - -![architecture-diagram](../master/images/bucket-antivirus-function.png) - -- Each time a new object is added to a bucket, S3 invokes the Lambda -function to scan the object -- The function package will download (if needed) current antivirus -definitions from a S3 bucket. Transfer speeds between a S3 bucket and -Lambda are typically faster and more reliable than another source -- The object is scanned for viruses and malware. Archive files are -extracted and the files inside scanned also -- The objects tags are updated to reflect the result of the scan, CLEAN -or INFECTED, along with the date and time of the scan. -- Object metadata is updated to reflect the result of the scan (optional) -- Metrics are sent to [DataDog](https://www.datadoghq.com/) (optional) -- Scan results are published to a SNS topic (optional) (Optionally choose to only publish INFECTED results) -- Files found to be INFECTED are automatically deleted (optional) - -## Installation - -### Build from Source - -To build the archive to upload to AWS Lambda, run `make all`. The build process is completed using -the [amazonlinux](https://hub.docker.com/_/amazonlinux/) [Docker](https://www.docker.com) - image. The resulting archive will be built at `build/lambda.zip`. This file will be - uploaded to AWS for both Lambda functions below. - -### Create Relevant AWS Infra via CloudFormation - -Use CloudFormation with the `cloudformation.yaml` located in the `deploy/` directory to quickly spin up the AWS infra needed to run this project. CloudFormation will create: +That repository is long out of date, but we've kept it going. There are many forks of it, but they have varying levels of quality and maintanability. -- An S3 bucket that will store AntiVirus definitions. -- A Lambda Function called `avUpdateDefinitions` that will update the AV Definitions in the S3 Bucket every 3 hours. -This function accesses the user’s above S3 Bucket to download updated definitions using `freshclam`. -- A Lambda Function called `avScanner` that is triggered on each new S3 object creation which scans the object and tags it appropriately. It is created with `1600mb` of memory which should be enough, however if you start to see function timeouts, this memory may have to be bumped up. In the past, we recommended using `1024mb`, but that has started causing Lambda timeouts and bumping this memory has resolved it. - -Running CloudFormation, it will ask for 2 inputs for this stack: - -1. BucketType: `private` (default) or `public`. This is applied to the S3 bucket that stores the AntiVirus definitions. We recommend to only use `public` when other AWS accounts need access to this bucket. -2. SourceBucket: [a non-empty string]. The name (do not include `s3://`) of the S3 bucket that will have its objects scanned. _Note - this is just used to create the IAM Policy, you can add/change source buckets later via the IAM Policy that CloudFormation outputs_ - -After the Stack has successfully created, there are 3 manual processes that still have to be done: - -1. Upload the `build/lambda.zip` file that was created by running `make all` to the `avUpdateDefinitions` and `avScanner` Lambda functions via the Lambda Console. -2. To trigger the Scanner function on new S3 objects, go to the `avScanner` Lambda function console, navigate to `Configuration` -> `Trigger` -> `Add Trigger` -> Search for S3, and choose your bucket(s) and select `All object create events`, then click `Add`. _Note - if you chose more than 1 bucket as the source, or chose a different bucket than the Source Bucket in the CloudFormation parameter, you will have to also edit the IAM Role to reflect these new buckets (see "Adding or Changing Source Buckets")_ -3. Navigate to the `avUpdateDefinitions` Lambda function and manually trigger the function to get the initial Clam definitions in the bucket (instead of waiting for the 3 hour trigger to happen). Do this by clicking the `Test` section, and then clicking the orange `test` button. The function should take a few seconds to execute, and when finished you should see the `clam_defs` in the `av-definitions` S3 bucket. - -#### Adding or Changing Source Buckets - -Changing or adding Source Buckets is done by editing the `AVScannerLambdaRole` IAM Role. More specifically, the `S3AVScan` and `KmsDecrypt` parts of that IAM Role's policy. - -### S3 Events +Scan new objects added to any s3 bucket using AWS Lambda. -Configure scanning of additional buckets by adding a new S3 event to -invoke the Lambda function. This is done from the properties of any -bucket in the AWS console. +## Overall Structure -![s3-event](../master/images/s3-event.png) +Read the [README for bluesentry](https://github.com/bluesentry/bucket-antivirus-function/blob/master/README.md) on the overall structure. -Note: If configured to update object metadata, events must only be -configured for `PUT` and `POST`. Metadata is immutable, which requires -the function to copy the object over itself with updated metadata. This -can cause a continuous loop of scanning if improperly configured. +The below explains some things that might be relevant for debugging/troubleshooting. -## Configuration +### Dependencies -Runtime configuration is accomplished using environment variables. See -the table below for reference. +`clamav` is installed in a docker image, along with its dependencies. The `Dockerfile` puts all the dynamically linked dependencies in the `/tmp/usr/lib64/` folder before copying them to the `/opt/app/bin` folder. -| Variable | Description | Default | Required | -| --- | --- | --- | --- | -| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | -| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | -| AV_DEFINITION_PATH | Path containing files at runtime | /tmp/clamav_defs | No | -| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | -| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | -| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | -| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | -| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | -| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | -| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | -| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | -| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | -| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | -| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | -| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | -| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | -| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | -| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | -| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | -| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | -| S3_ENDPOINT | The Endpoint to use when interacting wth S3 | None | No | -| SNS_ENDPOINT | The Endpoint to use when interacting wth SNS | None | No | -| LAMBDA_ENDPOINT | The Endpoint to use when interacting wth Lambda | None | No | +The `/opt/app/bin` folder is what is eventually deployed. -## S3 Bucket Policy Examples +If `clamav` is failing, it might have logs in Cloudwatch like: -### Deny to download the object if not "CLEAN" +> error while loading shared libraries: libjson-c.so.5: cannot open shared object file: No such file or directory -This policy doesn't allow to download the object until: +This basically means the dynamically linked library can't be found. This probably means you -1. The lambda that run Clam-AV is finished (so the object has a tag) -2. The file is not CLEAN +#### Debugging code for testing dependencies -Please make sure to check cloudtrail for the arn:aws:sts, just find the event open it and copy the sts. -It should be in the format provided below: +I found it helpful to run: -```json - { - "Effect": "Deny", - "NotPrincipal": { - "AWS": [ - "arn:aws:iam::<>:role/<>", - "arn:aws:sts::<>:assumed-role/<>/<>", - "arn:aws:iam::<>:root" - ] - }, - "Action": "s3:GetObject", - "Resource": "arn:aws:s3:::<>/*", - "Condition": { - "StringNotEquals": { - "s3:ExistingObjectTag/av-status": "CLEAN" - } - } -} ``` - -### Deny to download and re-tag "INFECTED" object - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Deny", - "Action": ["s3:GetObject", "s3:PutObjectTagging"], - "Principal": "*", - "Resource": ["arn:aws:s3:::<>/*"], - "Condition": { - "StringEquals": { - "s3:ExistingObjectTag/av-status": "INFECTED" - } - } - } - ] -} +docker run -it amazonlinux:2023 /bin/sh ``` -## Manually Scanning Buckets - -You may want to scan all the objects in a bucket that have not previously been scanned or were created -prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility. +Then: -```sh -pip install boto3 -scan_bucket.py --lambda-function-name= --s3-bucket-name= ``` - -This tool will scan all objects that have not been previously scanned in the bucket and invoke the lambda function -asynchronously. As such you'll have to go to your cloudwatch logs to see the scan results or failures. Additionally, -the script uses the same environment variables you'd use in your lambda so you can configure them similarly. - -## Testing - -There are two types of tests in this repository. The first is pre-commit tests and the second are python tests. All of -these tests are run by CircleCI. - -### pre-commit Tests - -The pre-commit tests ensure that code submitted to this repository meet the standards of the repository. To get started -with these tests run `make pre_commit_install`. This will install the pre-commit tool and then install it in this -repository. Then the github pre-commit hook will run these tests before you commit your code. - -To run the tests manually run `make pre_commit_tests` or `pre-commit run -a`. - -### Python Tests - -The python tests in this repository use `unittest` and are run via the `nose` utility. To run them you will need -to install the developer resources and then run the tests: - -```sh -pip install -r requirements.txt -pip install -r requirements-dev.txt -make test +yum install cpio yum-utils -y ``` -## Testing with Localstack - -You can test the lambda functions locally using [localstack](https://www.localstack.cloud/). This will run the lambda functions in docker containers. - -To get started you will need to install [Docker](https://docs.docker.com/install/) and [Docker Compose](https://docs.docker.com/compose/install/). - -Then you can run: - -```sh -make archive -docker compose up localstack -d # start localstack -aws s3 mb s3://antivirus-definitions --profile localstack # bucket name must match AV_DEFINITION_S3_BUCKET -aws s3 mb s3://test-bucket --profile localstack # bucket name must match TEST_BUCKET -wget https://secure.eicar.org/eicar_com.zip -aws s3 cp eicar_com.zip s3://test-bucket/eicar_com.zip --profile localstack -aws --endpoint-url=http://localhost:4566 lambda create-function \ - --function-name update-clamav \ - --runtime python3.12 \ - --handler update.lambda_handler \ - --role arn:aws:iam::123456789012:role/lambda-role \ - --zip-file fileb://./build/lambda.zip \ - --timeout 120 \ - --profile localstack \ - --environment "Variables={AV_DEFINITION_S3_BUCKET=antivirus-definitions}" -aws --endpoint-url=http://localhost:4566 lambda invoke \ - --function-name update-clamav --profile localstack \ - --invocation-type RequestResponse \ - --log-type Tail \ - --payload '{}' \ - response.json \ - --query 'LogResult' | tr -d '"' | base64 -d -aws --endpoint-url=http://localhost:4566 lambda create-function \ - --function-name scan-clamav \ - --runtime python3.12 \ - --handler scan.lambda_handler \ - --role arn:aws:iam::123456789012:role/lambda-role \ - --zip-file fileb://./build/lambda.zip \ - --timeout 120 \ - --profile localstack \ - --environment "Variables={AV_DEFINITION_S3_BUCKET=antivirus-definitions,AV_DELETE_INFECTED_FILES=True}" -aws --endpoint-url=http://localhost:4566 lambda invoke \ - --function-name scan-clamav --profile localstack \ - --invocation-type RequestResponse \ - --log-type Tail \ - --payload '{"Records": [{"s3": {"bucket": {"name": "test-bucket"}, "object": {"key": "eicar_com.zip"}}}]}' \ - response.json \ - --query 'LogResult' | tr -d '"' | base64 -d -aws s3 ls s3://test-bucket --profile localstack # should be empty +Then: +``` +cd /tmp +yumdownloader -x \*i686 --archlist=x86_64 json-c +rpm2cpio json-c*.rpm | cpio -idmv ``` -Note1: The `--profile localstack` is only needed if you have a profile named `localstack` in your `~/.aws/config` and `~/.aws/credentials` file. See [localstack docs](https://docs.localstack.cloud/user-guide/integrations/aws-cli/#aws-cli) for more info. - -Note2: The `--endpoint-url` is only needed if you are not running localstack on the default port of `4566`. - -Note3: The `--query 'LogResult' | tr -d '"' | base64 -d` is only needed if you want to see the logs from the lambda function. - -Note4: localstack will drop all file when it is stopped. If you want to keep the files you will need to copy them to a real s3 bucket. - -## License - -```text -Upside Travel, Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at +Then look in `/tmp/usr/lib64/` to see what is in there. If your file, e.g. `libjson-c.so` is in there then it'll be included. If it isn't, you need to figure out the right incantations to add it. -http://www.apache.org/licenses/LICENSE-2.0 +Once you run `DOCKER_BUILDKIT=0 make all` (I find it easier to do debugging if BUILDKIT is off) a zip file will be produced. -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -``` +You can also check in that to see if any of the files you expect to see (e.g. `libjson-c.so`) are missing/present. -ClamAV is released under the [GPL Version 2 License](https://github.com/vrtadmin/clamav-devel/blob/master/COPYING) -and all [source for ClamAV](https://github.com/vrtadmin/clamav-devel) is available -for download on Github. +Finally, you can upload the `deploy/lambda.zip` into Lambda's console to get it running. \ No newline at end of file diff --git a/common.py b/common.py index eeacb156..ea05c826 100644 --- a/common.py +++ b/common.py @@ -59,4 +59,4 @@ def create_dir(path): def get_timestamp(): - return datetime.datetime.now(datetime.UTC).strftime("%Y/%m/%d %H:%M:%S UTC") + return datetime.datetime.now(datetime.timezone.utc).strftime("%Y/%m/%d %H:%M:%S UTC")