diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index 477a8e7b..00000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,96 +0,0 @@ -############ -# -# Caches -# -# Caches may require a prefix, since caches in CircleCI 2.0 are immutable. -# A prefix provides an easy way to invalidate a cache. See https://circleci.com/docs/2.0/caching/#clearing-cache -# -############ - -version: "2.1" - -executors: - primary: - docker: - - image: docker:17.05.0-ce - python_test: - docker: - - image: circleci/python:3.7-stretch - pre_commit_test: - docker: - - image: circleci/python:3.7-stretch - -jobs: - - # `build` is used for building the archive - build: - executor: primary - working_directory: ~/src - steps: - - setup_remote_docker: - reusable: true # default - false - exclusive: true # default - true - - run: - name: Dependencies - command: apk add --no-cache make git curl openssh - - checkout - - run: - name: Build - command: make archive - - # `pre_commit_deps` is used for cache pre-commit sources - pre_commit_deps: - executor: pre_commit_test - steps: - - checkout - - - restore_cache: - keys: - - pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} - - run: sudo pip install pre-commit==1.18.3 - - run: pre-commit install-hooks - - - save_cache: - key: pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} - paths: - - ~/.cache/pre-commit - - # `pre_commit_test` is used to run pre-commit hooks on all files - pre_commit_test: - executor: pre_commit_test - steps: - - checkout - - restore_cache: - keys: - - pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} - - run: sudo pip install pre-commit==1.18.3 - - run: - name: Run pre-commit tests - command: pre-commit run --all-files - - # `test` is used to run python tests - test: - executor: python_test - steps: - - checkout - - restore_cache: - keys: - - pre-commit-dot-cache-{{ checksum ".pre-commit-config.yaml" }} - - run: sudo pip install -r requirements.txt - - run: sudo pip install -r requirements-dev.txt - - run: nosetests - -workflows: - version: 2 - - main: - jobs: - - pre_commit_deps - - pre_commit_test: - requires: - - pre_commit_deps - - test - - build: - requires: - - pre_commit_test - - test diff --git a/.coveragerc b/.coveragerc index 023d3bb6..bd4102d1 100644 --- a/.coveragerc +++ b/.coveragerc @@ -6,3 +6,6 @@ omit = */python?.?/* */site-packages/nose/* show_missing = true + +[html] +directory = coverage_html_report \ No newline at end of file diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index 6a8d16fe..00000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1 +0,0 @@ -* @UpsideTravel/full-access diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..3619caab --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,29 @@ +name: CI + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.12' + - name: Create virtualenv + run: | + python -m venv venv + source venv/bin/activate + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: Run pytest + run: | + pytest -v + - name: Run pre-commit + run: | + pip install pre-commit + pre-commit install-hooks + pre-commit run --all-files \ No newline at end of file diff --git a/.github/workflows/lambda.yaml b/.github/workflows/lambda.yaml new file mode 100644 index 00000000..57acf3a2 --- /dev/null +++ b/.github/workflows/lambda.yaml @@ -0,0 +1,63 @@ +name: Test with LocalStack + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install dependencies + run: sudo apt update && sudo apt install -y make wget + - name: build docker image + run: make archive + - name: Start LocalStack + uses: LocalStack/setup-localstack@main + with: + image-tag: 'latest' + install-awslocal: 'true' + - name: Run tests against localstack + run: | + awslocal s3 mb s3://antivirus-definitions + awslocal s3 mb s3://test-bucket + wget https://secure.eicar.org/eicar_com.zip + awslocal s3 cp eicar_com.zip s3://test-bucket/eicar_com.zip + awslocal lambda create-function \ + --function-name update-clamav \ + --runtime python3.12 \ + --handler update.lambda_handler \ + --role arn:aws:iam::123456789012:role/lambda-role \ + --zip-file fileb://./build/lambda.zip \ + --timeout 120 \ + --environment "Variables={AV_DEFINITION_S3_BUCKET=antivirus-definitions}" + sleep 30 + awslocal lambda invoke \ + --function-name update-clamav \ + --invocation-type RequestResponse \ + --log-type Tail \ + --payload '{}' \ + response.json \ + --query 'LogResult' | tr -d '"' | base64 -d + awslocal lambda create-function \ + --function-name scan-clamav \ + --runtime python3.12 \ + --handler scan.lambda_handler \ + --role arn:aws:iam::123456789012:role/lambda-role \ + --zip-file fileb://./build/lambda.zip \ + --timeout 120 \ + --environment "Variables={AV_DEFINITION_S3_BUCKET=antivirus-definitions,AV_DELETE_INFECTED_FILES=True}" + sleep 30 + awslocal lambda invoke \ + --function-name scan-clamav \ + --invocation-type RequestResponse \ + --log-type Tail \ + --payload '{"Records": [{"s3": {"bucket": {"name": "test-bucket"}, "object": {"key": "eicar_com.zip"}}}]}' \ + response.json \ + --query 'LogResult' | tr -d '"' | base64 -d + result=$(awslocal s3 ls s3://test-bucket) + if [ -z "$result" ]; then + echo "Bucket is empty" + else + echo "Bucket is not empty" + exit 1 + fi diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml new file mode 100644 index 00000000..cd32dcbf --- /dev/null +++ b/.github/workflows/pre-commit.yaml @@ -0,0 +1,26 @@ +name: pre-commit + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.12' + - name: Create virtualenv + run: | + python -m venv venv + source venv/bin/activate + - name: Install dependencies + run: | + pip install -r requirements.txt + pip install -r requirements-dev.txt + - name: Run pre-commit + run: | + pip install pre-commit + pre-commit install-hooks + pre-commit run --all-files \ No newline at end of file diff --git a/.gitignore b/.gitignore index da9084e7..22505e42 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,9 @@ tmp/ # EICAR Files *eicar* + +# response.json +response.json + +# coverage report +coverage_html_report/ \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 806c3de2..13d6cf07 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,30 +1,41 @@ repos: - repo: https://github.com/ambv/black - rev: stable + rev: 23.12.1 hooks: - id: black - language_version: python3.7 + language_version: python3.12 exclude: > (?x)^( scripts/gen-docs-index| )$ - - repo: git://github.com/pre-commit/pre-commit-hooks - rev: v2.2.3 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 hooks: - id: check-ast - id: check-json - id: check-merge-conflict - id: check-yaml + exclude: deploy/ - id: debug-statements - id: detect-private-key - id: fix-encoding-pragma - - id: flake8 - id: trailing-whitespace - - repo: git://github.com/igorshubovych/markdownlint-cli - rev: v0.17.0 + - repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.38.0 hooks: - id: markdownlint entry: markdownlint --ignore .github/*.md + + - repo: https://github.com/aws-cloudformation/cfn-python-lint + rev: v0.84.0 + hooks: + - id: cfn-python-lint + files: deploy/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 96ee3646..2ae508d7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM amazonlinux:2 +FROM --platform=linux/x86_64 public.ecr.aws/lambda/python:3.9 WORKDIR /opt/app @@ -12,30 +12,76 @@ COPY ./*.py /opt/app/ COPY requirements.txt /opt/app/requirements.txt # Install packages -RUN yum update -y -RUN yum install -y cpio python3-pip yum-utils zip unzip less -RUN yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm +# update security +RUN : \ + && yum -y update --security \ + && yum clean all \ + && rm -rf /var/cache/yum \ + && : # This had --no-cache-dir, tracing through multiple tickets led to a problem in wheel -RUN pip3 install -r requirements.txt -RUN rm -rf /root/.cache/pip +# Install required packages +RUN : \ + && yum update -y \ + && yum install -y \ + cpio \ + python3 \ + python3-pip \ + yum-utils \ + zip \ + unzip \ + less \ + libtool-ltdl \ + binutils \ + && yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm \ + && pip3 install -r /opt/app/requirements.txt \ + && yum clean all \ + && rm -rf /var/cache/yum \ + && : # Download libraries we need to run in lambda WORKDIR /tmp -RUN yumdownloader -x \*i686 --archlist=x86_64 clamav clamav-lib clamav-update json-c pcre2 libprelude gnutls libtasn1 lib64nettle nettle +RUN yumdownloader -x \*i686 --archlist=x86_64 \ + clamav \ + clamav-lib \ + clamav-scanner-systemd \ + clamav-update \ + elfutils-libs \ + json-c \ + lz4 \ + pcre2 \ + systemd-libs \ + libprelude \ + gnutls \ + libtasn1 \ + lib64nettle \ + nettle \ + libtool-ltdl \ + libxml2 \ + xz-libs \ + xz-devel + RUN rpm2cpio clamav-0*.rpm | cpio -idmv RUN rpm2cpio clamav-lib*.rpm | cpio -idmv RUN rpm2cpio clamav-update*.rpm | cpio -idmv +RUN rpm2cpio clamd-0*.rpm | cpio -idmv +RUN rpm2cpio elfutils-libs*.rpm | cpio -idmv RUN rpm2cpio json-c*.rpm | cpio -idmv +RUN rpm2cpio lz4*.rpm | cpio -idmv RUN rpm2cpio pcre*.rpm | cpio -idmv +RUN rpm2cpio systemd-libs*.rpm | cpio -idmv RUN rpm2cpio gnutls* | cpio -idmv RUN rpm2cpio nettle* | cpio -idmv +RUN rpm2cpio libtasn1* | cpio -idmv +RUN rpm2cpio libtool-ltdl* | cpio -idmv +RUN rpm2cpio libxml2* | cpio -idmv +RUN rpm2cpio xz-libs* | cpio -idmv +RUN rpm2cpio xz-devel* | cpio -idmv RUN rpm2cpio lib* | cpio -idmv RUN rpm2cpio *.rpm | cpio -idmv -RUN rpm2cpio libtasn1* | cpio -idmv # Copy over the binaries and libraries -RUN cp /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/ +RUN cp -r /tmp/usr/bin/clamscan /tmp/usr/bin/freshclam /tmp/usr/lib64/* /opt/app/bin/ # Fix the freshclam.conf settings RUN echo "DatabaseMirror database.clamav.net" > /opt/app/bin/freshclam.conf @@ -45,7 +91,7 @@ RUN echo "CompressLocalDatabase yes" >> /opt/app/bin/freshclam.conf WORKDIR /opt/app RUN zip -r9 --exclude="*test*" /opt/app/build/lambda.zip *.py bin -WORKDIR /usr/local/lib/python3.7/site-packages +WORKDIR /var/lang/lib/python3.9/site-packages RUN zip -r9 /opt/app/build/lambda.zip * -WORKDIR /opt/app +WORKDIR /opt/app \ No newline at end of file diff --git a/Makefile b/Makefile index a527b641..cb69f6d8 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,8 @@ clean: ## Clean build artifacts rm -rf build/ rm -rf tmp/ rm -f .coverage - find ./ -type d -name '__pycache__' -delete + rm -rf .pytest_cache/ + find ./ -type d -name '__pycache__' | xargs rm -rf find ./ -type f -name '*.pyc' -delete .PHONY: archive @@ -40,8 +41,8 @@ archive: clean ## Create the archive for AWS lambda .PHONY: pre_commit_install ## Ensure that pre-commit hook is installed and kept up to date pre_commit_install: .git/hooks/pre-commit ## Ensure pre-commit is installed -.git/hooks/pre-commit: /usr/local/bin/pre-commit - pip install pre-commit==1.18.3 +.git/hooks/pre-commit: venv ## Ensure venv is created first + pip install pre-commit pre-commit install pre-commit install-hooks @@ -51,16 +52,8 @@ pre_commit_tests: ## Run pre-commit tests .PHONY: test test: clean ## Run python tests - nosetests + pytest --no-cov .PHONY: coverage coverage: clean ## Run python tests with coverage - nosetests --with-coverage - -.PHONY: scan -scan: ./build/lambda.zip ## Run scan function locally - scripts/run-scan-lambda $(TEST_BUCKET) $(TEST_KEY) - -.PHONY: update -update: ./build/lambda.zip ## Run update function locally - scripts/run-update-lambda + pytest --cov=. --cov-report html diff --git a/README.md b/README.md index e0ce5cb6..e3413bc2 100644 --- a/README.md +++ b/README.md @@ -1,416 +1,54 @@ # bucket-antivirus-function -[![CircleCI](https://circleci.com/gh/upsidetravel/bucket-antivirus-function.svg?style=svg)](https://circleci.com/gh/upsidetravel/bucket-antivirus-function) +This function is inspired by https://github.com/bluesentry/bucket-antivirus-function. -Scan new objects added to any s3 bucket using AWS Lambda. [more details in this post](https://engineering.upside.com/s3-antivirus-scanning-with-lambda-and-clamav-7d33f9c5092e) +That repository is long out of date, but we've kept it going. There are many forks of it, but they have varying levels of quality and maintanability. -## Features +Scan new objects added to any s3 bucket using AWS Lambda. -- Easy to install -- Send events from an unlimited number of S3 buckets -- Prevent reading of infected files using S3 bucket policies -- Accesses the end-user’s separate installation of -open source antivirus engine [ClamAV](http://www.clamav.net/) +## Overall Structure -## How It Works +Read the [README for bluesentry](https://github.com/bluesentry/bucket-antivirus-function/blob/master/README.md) on the overall structure. -![architecture-diagram](../master/images/bucket-antivirus-function.png) +The below explains some things that might be relevant for debugging/troubleshooting. -- Each time a new object is added to a bucket, S3 invokes the Lambda -function to scan the object -- The function package will download (if needed) current antivirus -definitions from a S3 bucket. Transfer speeds between a S3 bucket and -Lambda are typically faster and more reliable than another source -- The object is scanned for viruses and malware. Archive files are -extracted and the files inside scanned also -- The objects tags are updated to reflect the result of the scan, CLEAN -or INFECTED, along with the date and time of the scan. -- Object metadata is updated to reflect the result of the scan (optional) -- Metrics are sent to [DataDog](https://www.datadoghq.com/) (optional) -- Scan results are published to a SNS topic (optional) (Optionally choose to only publish INFECTED results) -- Files found to be INFECTED are automatically deleted (optional) +### Dependencies -## Installation +`clamav` is installed in a docker image, along with its dependencies. The `Dockerfile` puts all the dynamically linked dependencies in the `/tmp/usr/lib64/` folder before copying them to the `/opt/app/bin` folder. -### Build from Source +The `/opt/app/bin` folder is what is eventually deployed. -To build the archive to upload to AWS Lambda, run `make`. The build process is completed using -the [amazonlinux](https://hub.docker.com/_/amazonlinux/) [Docker](https://www.docker.com) - image. The resulting archive will be built at `build/lambda.zip`. This file will be - uploaded to AWS for both Lambda functions below. +If `clamav` is failing, it might have logs in Cloudwatch like: -### AV Definition Bucket +> error while loading shared libraries: libjson-c.so.5: cannot open shared object file: No such file or directory -Create an s3 bucket to store current antivirus definitions. This -provides the fastest download speeds for the scanner. This bucket can -be kept as private. +This basically means the dynamically linked library can't be found. This probably means you -To allow public access, useful for other accounts, -add the following policy to the bucket. +#### Debugging code for testing dependencies -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Sid": "AllowPublic", - "Effect": "Allow", - "Principal": "*", - "Action": [ - "s3:GetObject", - "s3:GetObjectTagging" - ], - "Resource": "arn:aws:s3:::<bucket-name>/*" - } - ] -} -``` - -### Definition Update Lambda - -This function accesses the user’s ClamAV instance to download -updated definitions using `freshclam`. It is recommended to run -this every 3 hours to stay protected from the latest threats. - -1. Create the archive using the method in the - [Build from Source](#build-from-source) section. -2. From the AWS Lambda Dashboard, click **Create function** -3. Choose **Author from scratch** on the *Create function* page -4. Name your function `bucket-antivirus-update` when prompted on the -*Configure function* step. -5. Set *Runtime* to `Python 3.7` -6. Create a new role name `bucket-antivirus-update` that uses the -following policy document - - ```json - { - "Version":"2012-10-17", - "Statement":[ - { - "Sid":"WriteCloudWatchLogs", - "Effect":"Allow", - "Action":[ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents" - ], - "Resource":"*" - }, - { - "Sid":"s3GetAndPutWithTagging", - "Action":[ - "s3:GetObject", - "s3:GetObjectTagging", - "s3:PutObject", - "s3:PutObjectTagging", - "s3:PutObjectVersionTagging" - ], - "Effect":"Allow", - "Resource":[ - "arn:aws:s3:::<av-definition-s3-bucket>/*" - ] - }, - { - "Sid": "s3HeadObject", - "Effect": "Allow", - "Action": "s3:ListBucket", - "Resource": [ - "arn:aws:s3:::<av-definition-s3-bucket>/*", - "arn:aws:s3:::<av-definition-s3-bucket>" - ] - } - ] - } - ``` - -7. Click next to go to the Configuration page -8. Add a trigger from the left of **CloudWatch Event** using `rate(3 hours)` -for the **Schedule expression**. Be sure to check **Enable trigger** -9. Choose **Upload a ZIP file** for *Code entry type* and select the archive -downloaded in step 1. -10. Add a single environment variable named `AV_DEFINITION_S3_BUCKET` -and set its value to the name of the bucket created to store your AV -definitions. -11. Set *Lambda handler* to `update.lambda_handler` -12. Under *Basic Settings*, set *Timeout* to **5 minutes** and *Memory* to -**1024** -13. Save and test your function. If prompted for test data, just use -the default provided. - -### AV Scanner Lambda - -1. Create the archive using the method in the - [Build from Source](#build-from-source) section. -2. From the AWS Lambda Dashboard, click **Create function** -3. Choose **Author from scratch** on the *Create function* page -4. Name your function `bucket-antivirus-function` -5. Set *Runtime* to `Python 3.7` -6. Create a new role name `bucket-antivirus-function` that uses the -following policy document - - ```json - { - "Version":"2012-10-17", - "Statement":[ - { - "Sid":"WriteCloudWatchLogs", - "Effect":"Allow", - "Action":[ - "logs:CreateLogGroup", - "logs:CreateLogStream", - "logs:PutLogEvents" - ], - "Resource":"*" - }, - { - "Sid":"s3AntiVirusScan", - "Action":[ - "s3:GetObject", - "s3:GetObjectTagging", - "s3:GetObjectVersion", - "s3:PutObjectTagging", - "s3:PutObjectVersionTagging" - ], - "Effect":"Allow", - "Resource": [ - "arn:aws:s3:::<bucket-name-1>/*", - "arn:aws:s3:::<bucket-name-2>/*" - ] - }, - { - "Sid":"s3AntiVirusDefinitions", - "Action":[ - "s3:GetObject", - "s3:GetObjectTagging" - ], - "Effect":"Allow", - "Resource": [ - "arn:aws:s3:::<av-definition-s3-bucket>/*" - ] - }, - { - "Sid":"kmsDecrypt", - "Action":[ - "kms:Decrypt" - ], - "Effect":"Allow", - "Resource": [ - "arn:aws:s3:::<bucket-name-1>/*", - "arn:aws:s3:::<bucket-name-2>/*" - ] - }, - { - "Sid":"snsPublish", - "Action": [ - "sns:Publish" - ], - "Effect":"Allow", - "Resource": [ - "arn:aws:sns:::<av-scan-start>", - "arn:aws:sns:::<av-status>" - ] - }, - { - "Sid":"s3HeadObject", - "Effect":"Allow", - "Action":"s3:ListBucket", - "Resource":[ - "arn:aws:s3:::<av-definition-s3-bucket>/*", - "arn:aws:s3:::<av-definition-s3-bucket>" - ] - } - ] - } - ``` - -7. Click *next* to head to the Configuration page -8. Add a new trigger of type **S3 Event** using `ObjectCreate(all)`. -9. Choose **Upload a ZIP file** for *Code entry type* and select the archive -created in step 1. -10. Set *Lambda handler* to `scan.lambda_handler` -11. Add a single environment variable named `AV_DEFINITION_S3_BUCKET` -and set its value to the name of the bucket created to store your AV -definitions. If your bucket is `s3://my-bucket`, the value should be `my-bucket`. -12. Under *Basic settings*, set *Timeout* to **5 minutes** and *Memory* to -**1024** -13. Save the function. Testing is easiest performed by uploading a -file to the bucket configured as the trigger in step 4. - -### S3 Events - -Configure scanning of additional buckets by adding a new S3 event to -invoke the Lambda function. This is done from the properties of any -bucket in the AWS console. - -![s3-event](../master/images/s3-event.png) - -Note: If configured to update object metadata, events must only be -configured for `PUT` and `POST`. Metadata is immutable, which requires -the function to copy the object over itself with updated metadata. This -can cause a continuous loop of scanning if improperly configured. - -## Configuration - -Runtime configuration is accomplished using environment variables. See -the table below for reference. - -| Variable | Description | Default | Required | -| --- | --- | --- | --- | -| AV_DEFINITION_S3_BUCKET | Bucket containing antivirus definition files | | Yes | -| AV_DEFINITION_S3_PREFIX | Prefix for antivirus definition files | clamav_defs | No | -| AV_DEFINITION_PATH | Path containing files at runtime | /tmp/clamav_defs | No | -| AV_SCAN_START_SNS_ARN | SNS topic ARN to publish notification about start of scan | | No | -| AV_SCAN_START_METADATA | The tag/metadata indicating the start of the scan | av-scan-start | No | -| AV_SIGNATURE_METADATA | The tag/metadata name representing file's AV type | av-signature | No | -| AV_STATUS_CLEAN | The value assigned to clean items inside of tags/metadata | CLEAN | No | -| AV_STATUS_INFECTED | The value assigned to clean items inside of tags/metadata | INFECTED | No | -| AV_STATUS_METADATA | The tag/metadata name representing file's AV status | av-status | No | -| AV_STATUS_SNS_ARN | SNS topic ARN to publish scan results (optional) | | No | -| AV_STATUS_SNS_PUBLISH_CLEAN | Publish AV_STATUS_CLEAN results to AV_STATUS_SNS_ARN | True | No | -| AV_STATUS_SNS_PUBLISH_INFECTED | Publish AV_STATUS_INFECTED results to AV_STATUS_SNS_ARN | True | No | -| AV_TIMESTAMP_METADATA | The tag/metadata name representing file's scan time | av-timestamp | No | -| CLAMAVLIB_PATH | Path to ClamAV library files | ./bin | No | -| CLAMSCAN_PATH | Path to ClamAV clamscan binary | ./bin/clamscan | No | -| FRESHCLAM_PATH | Path to ClamAV freshclam binary | ./bin/freshclam | No | -| DATADOG_API_KEY | API Key for pushing metrics to DataDog (optional) | | No | -| AV_PROCESS_ORIGINAL_VERSION_ONLY | Controls that only original version of an S3 key is processed (if bucket versioning is enabled) | False | No | -| AV_DELETE_INFECTED_FILES | Controls whether infected files should be automatically deleted | False | No | -| EVENT_SOURCE | The source of antivirus scan event "S3" or "SNS" (optional) | S3 | No | - -## S3 Bucket Policy Examples - -### Deny to download the object if not "CLEAN" - -This policy doesn't allow to download the object until: - -1. The lambda that run Clam-AV is finished (so the object has a tag) -2. The file is not CLEAN - -Please make sure to check cloudtrail for the arn:aws:sts, just find the event open it and copy the sts. -It should be in the format provided below: +I found it helpful to run: -```json - { - "Effect": "Deny", - "NotPrincipal": { - "AWS": [ - "arn:aws:iam::<<aws-account-number>>:role/<<bucket-antivirus-role>>", - "arn:aws:sts::<<aws-account-number>>:assumed-role/<<bucket-antivirus-role>>/<<bucket-antivirus-role>>", - "arn:aws:iam::<<aws-account-number>>:root" - ] - }, - "Action": "s3:GetObject", - "Resource": "arn:aws:s3:::<<bucket-name>>/*", - "Condition": { - "StringNotEquals": { - "s3:ExistingObjectTag/av-status": "CLEAN" - } - } -} ``` - -### Deny to download and re-tag "INFECTED" object - -```json -{ - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Deny", - "Action": ["s3:GetObject", "s3:PutObjectTagging"], - "Principal": "*", - "Resource": ["arn:aws:s3:::<<bucket-name>>/*"], - "Condition": { - "StringEquals": { - "s3:ExistingObjectTag/av-status": "INFECTED" - } - } - } - ] -} +docker run -it amazonlinux:2023 /bin/sh ``` -## Manually Scanning Buckets - -You may want to scan all the objects in a bucket that have not previously been scanned or were created -prior to setting up your lambda functions. To do this you can use the `scan_bucket.py` utility. +Then: -```sh -pip install boto3 -scan_bucket.py --lambda-function-name=<lambda_function_name> --s3-bucket-name=<s3-bucket-to-scan> ``` - -This tool will scan all objects that have not been previously scanned in the bucket and invoke the lambda function -asynchronously. As such you'll have to go to your cloudwatch logs to see the scan results or failures. Additionally, -the script uses the same environment variables you'd use in your lambda so you can configure them similarly. - -## Testing - -There are two types of tests in this repository. The first is pre-commit tests and the second are python tests. All of -these tests are run by CircleCI. - -### pre-commit Tests - -The pre-commit tests ensure that code submitted to this repository meet the standards of the repository. To get started -with these tests run `make pre_commit_install`. This will install the pre-commit tool and then install it in this -repository. Then the github pre-commit hook will run these tests before you commit your code. - -To run the tests manually run `make pre_commit_tests` or `pre-commit run -a`. - -### Python Tests - -The python tests in this repository use `unittest` and are run via the `nose` utility. To run them you will need -to install the developer resources and then run the tests: - -```sh -pip install -r requirements.txt -pip install -r requirements-dev.txt -make test +yum install cpio yum-utils -y ``` -### Local lambdas - -You can run the lambdas locally to test out what they are doing without deploying to AWS. This is accomplished -by using docker containers that act similarly to lambda. You will need to have set up some local variables in your -`.envrc.local` file and modify them appropriately first before running `direnv allow`. If you do not have `direnv` -it can be installed with `brew install direnv`. - -For the Scan lambda you will need a test file uploaded to S3 and the variables `TEST_BUCKET` and `TEST_KEY` -set in your `.envrc.local` file. Then you can run: - -```sh -direnv allow -make archive scan +Then: ``` - -If you want a file that will be recognized as a virus you can download a test file from the [EICAR](https://www.eicar.org/?page_id=3950) -website and uploaded to your bucket. - -For the Update lambda you can run: - -```sh -direnv allow -make archive update +cd /tmp +yumdownloader -x \*i686 --archlist=x86_64 json-c +rpm2cpio json-c*.rpm | cpio -idmv ``` -## License - -```text -Upside Travel, Inc. +Then look in `/tmp/usr/lib64/` to see what is in there. If your file, e.g. `libjson-c.so` is in there then it'll be included. If it isn't, you need to figure out the right incantations to add it. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at +Once you run `DOCKER_BUILDKIT=0 make all` (I find it easier to do debugging if BUILDKIT is off) a zip file will be produced. -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -``` +You can also check in that to see if any of the files you expect to see (e.g. `libjson-c.so`) are missing/present. -ClamAV is released under the [GPL Version 2 License](https://github.com/vrtadmin/clamav-devel/blob/master/COPYING) -and all [source for ClamAV](https://github.com/vrtadmin/clamav-devel) is available -for download on Github. +Finally, you can upload the `deploy/lambda.zip` into Lambda's console to get it running. \ No newline at end of file diff --git a/clamav.py b/clamav.py index 083370d2..2c033951 100644 --- a/clamav.py +++ b/clamav.py @@ -24,7 +24,7 @@ import botocore from pytz import utc -from common import AV_DEFINITION_S3_PREFIX +from common import AV_DEFINITION_S3_PREFIX, S3_ENDPOINT from common import AV_DEFINITION_PATH from common import AV_DEFINITION_FILE_PREFIXES from common import AV_DEFINITION_FILE_SUFFIXES @@ -90,7 +90,7 @@ def upload_defs_to_s3(s3_client, bucket, prefix, local_path): "Uploading %s to s3://%s" % (local_file_path, os.path.join(bucket, prefix, filename)) ) - s3 = boto3.resource("s3") + s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) s3_object = s3.Object(bucket, os.path.join(prefix, filename)) s3_object.upload_file(os.path.join(local_path, filename)) s3_client.put_object_tagging( @@ -112,7 +112,7 @@ def update_defs_from_freshclam(path, library_path=""): fc_env = os.environ.copy() if library_path: fc_env["LD_LIBRARY_PATH"] = "%s:%s" % ( - ":".join(current_library_search_path()), + fc_env["LD_LIBRARY_PATH"], CLAMAVLIB_PATH, ) print("Starting freshclam with defs in %s." % path) diff --git a/clamav_test.py b/clamav_test.py index 0ffbbf96..ee8027cb 100644 --- a/clamav_test.py +++ b/clamav_test.py @@ -132,7 +132,7 @@ def test_md5_from_s3_tags_no_md5(self): md5_hash = md5_from_s3_tags( self.s3_client, self.s3_bucket_name, self.s3_key_name ) - self.assertEquals("", md5_hash) + self.assertEqual("", md5_hash) def test_md5_from_s3_tags_has_md5(self): expected_md5_hash = "d41d8cd98f00b204e9800998ecf8427e" @@ -153,10 +153,9 @@ def test_md5_from_s3_tags_has_md5(self): md5_hash = md5_from_s3_tags( self.s3_client, self.s3_bucket_name, self.s3_key_name ) - self.assertEquals(expected_md5_hash, md5_hash) + self.assertEqual(expected_md5_hash, md5_hash) def test_time_from_s3(self): - expected_s3_time = datetime.datetime(2019, 1, 1) s3_stubber = Stubber(self.s3_client) @@ -172,7 +171,7 @@ def test_time_from_s3(self): s3_time = time_from_s3( self.s3_client, self.s3_bucket_name, self.s3_key_name ) - self.assertEquals(expected_s3_time, s3_time) + self.assertEqual(expected_s3_time, s3_time) @mock.patch("clamav.md5_from_file") @mock.patch("common.os.path.exists") @@ -234,7 +233,7 @@ def test_update_defs_from_s3(self, mock_exists, mock_md5_from_file): to_download = update_defs_from_s3( self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX ) - self.assertEquals(expected_to_download, to_download) + self.assertEqual(expected_to_download, to_download) @mock.patch("clamav.md5_from_file") @mock.patch("common.os.path.exists") @@ -283,7 +282,7 @@ def test_update_defs_from_s3_same_hash(self, mock_exists, mock_md5_from_file): to_download = update_defs_from_s3( self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX ) - self.assertEquals(expected_to_download, to_download) + self.assertEqual(expected_to_download, to_download) @mock.patch("clamav.md5_from_file") @mock.patch("common.os.path.exists") @@ -349,4 +348,4 @@ def test_update_defs_from_s3_old_files(self, mock_exists, mock_md5_from_file): to_download = update_defs_from_s3( self.s3_client, self.s3_bucket_name, AV_DEFINITION_S3_PREFIX ) - self.assertEquals(expected_to_download, to_download) + self.assertEqual(expected_to_download, to_download) diff --git a/common.py b/common.py index 9e95af96..ea05c826 100644 --- a/common.py +++ b/common.py @@ -43,6 +43,9 @@ AV_DEFINITION_FILE_PREFIXES = ["main", "daily", "bytecode"] AV_DEFINITION_FILE_SUFFIXES = ["cld", "cvd"] +SNS_ENDPOINT = os.getenv("SNS_ENDPOINT", None) +S3_ENDPOINT = os.getenv("S3_ENDPOINT", None) +LAMBDA_ENDPOINT = os.getenv("LAMBDA_ENDPOINT", None) def create_dir(path): @@ -56,4 +59,4 @@ def create_dir(path): def get_timestamp(): - return datetime.datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S UTC") + return datetime.datetime.now(datetime.timezone.utc).strftime("%Y/%m/%d %H:%M:%S UTC") diff --git a/deploy/cloudformation.yaml b/deploy/cloudformation.yaml new file mode 100644 index 00000000..a29e33ae --- /dev/null +++ b/deploy/cloudformation.yaml @@ -0,0 +1,286 @@ +--- + AWSTemplateFormatVersion: '2010-09-09' + + Description: Bucket Antivirus Quickstart Template + + Parameters: + + AVBucketType: + Type: String + Description: Specifies if the bucket to hold the AV deinitions should be "public" or "private". Only choose "public" if other accounts need to access this bucket." + Default: "private" + AllowedValues: + - "public" + - "private" + + SourceBucket: + Type: String + Description: Name of the source bucket whose objects will be scanned. If more than one source bucket, the others will have to be manually added to the AV Scanner Policy after creation. + Default: "<source-bucket>" + AllowedPattern : ".+" + + Conditions: + publicBucket: !Equals [ !Ref AVBucketType, "public" ] + + Resources: + + S3BucketAVDefinitions: + Type: AWS::S3::Bucket + Properties: + BucketName: !Join # Append the CloudFormation StackId for unique bucket naming + - "-" + - - "antivirus-definitions" + - !Select + - 0 + - !Split + - "-" + - !Select + - 2 + - !Split + - "/" + - !Ref "AWS::StackId" + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: AES256 + PublicAccessBlockConfiguration: + BlockPublicAcls: !If [ publicBucket, false, true ] + BlockPublicPolicy: !If [ publicBucket, false, true ] + IgnorePublicAcls: !If [ publicBucket, false, true ] + RestrictPublicBuckets: !If [ publicBucket, false, true ] + Tags: + - Key: Service + Value: bucket-antivirus + VersioningConfiguration: + Status: Suspended + + S3BucketPolicyAVDefinitions: + Type: AWS::S3::BucketPolicy + Condition: publicBucket + Properties: + Bucket: !Ref S3BucketAVDefinitions + PolicyDocument: + Statement: + - Sid: AllowPublic + Action: + - s3:GetObject + - s3:GetObjectTagging + Effect: Allow + Principal: + AWS: + - "*" + Resource: + - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] + + IamRoleAVDefinitions: + Type: 'AWS::IAM::Role' + Properties: + RoleName: AVDefinitionsLambdaRole + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Tags: + - Key: Service + Value: bucket-antivirus + + IamRoleAVScanner: + Type: 'AWS::IAM::Role' + Properties: + RoleName: AVScannerLambdaRole + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Tags: + - Key: Service + Value: bucket-antivirus + + IamPolicyAVDefinitions: + Type: AWS::IAM::Policy + Properties: + PolicyName: AVDefinitionsLambdaPolicy + Roles: + - !Ref IamRoleAVDefinitions + PolicyDocument: + Version: "2012-10-17" + Statement: + - Sid: WriteCloudWatchLogs + Effect: Allow + Action: + - "logs:CreateLogGroup" + - "logs:CreateLogStream" + - "logs:PutLogEvents" + Resource: "*" + - Sid: S3GetAndPutWithTagging + Effect: Allow + Action: + - "s3:GetObject" + - "s3:GetObjectTagging" + - "s3:PutObject" + - "s3:PutObjectTagging" + - "s3:PutObjectVersionTagging" + Resource: + - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] + - Sid: S3HeadObject + Effect: Allow + Action: + - "s3:ListBucket" + Resource: + - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] + - !Sub [ "arn:aws:s3:::${BucketName}", { BucketName: !Ref S3BucketAVDefinitions } ] + + IamPolicyAVScanner: + Type: AWS::IAM::Policy + Properties: + PolicyName: AVScannerLambdaPolicy + Roles: + - !Ref IamRoleAVScanner + PolicyDocument: + Version: "2012-10-17" + Statement: + - Sid: WriteCloudWatchLogs + Effect: Allow + Action: + - "logs:CreateLogGroup" + - "logs:CreateLogStream" + - "logs:PutLogEvents" + Resource: "*" + - Sid: S3AVScan + Effect: Allow + Action: + - "s3:GetObject" + - "s3:GetObjectTagging" + - "s3:GetObjectVersion" + - "s3:PutObjectTagging" + - "s3:PutObjectVersionTagging" + Resource: + - !Sub [ "arn:aws:s3:::${SourceBucketName}/*", { SourceBucketName: !Ref SourceBucket } ] + - Sid: S3AVDefinitions + Effect: Allow + Action: + - "s3:GetObject" + - "s3:GetObjectTagging" + Resource: + - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] + - Sid: KmsDecrypt + Effect: Allow + Action: + - "kms:Decrypt" + Resource: + - !Sub [ "arn:aws:s3:::${SourceBucketName}/*", { SourceBucketName: !Ref SourceBucket } ] + - Sid: SNSPublic + Effect: Allow + Action: + - "sns:Publish" + Resource: + - "arn:aws:sns:::<av-scan-start>" + - "arn:aws:sns:::<av-status>" + - Sid: S3HeadObject + Effect: Allow + Action: + - "s3:ListBucket" + Resource: + - !Sub [ "arn:aws:s3:::${BucketName}/*", { BucketName: !Ref S3BucketAVDefinitions } ] + - !Sub [ "arn:aws:s3:::${BucketName}", { BucketName: !Ref S3BucketAVDefinitions } ] + + LambdaAVUpdateDefinitions: + Type: AWS::Lambda::Function + Properties: + FunctionName: avUpdateDefinitions + Description: LambdaFunction to update the AntiVirus definitions in the AV Definitions bucket. + Runtime: python3.7 + Code: + ZipFile: | + import json + def lambda_handler(event, context): + return { + 'statusCode': 200, 'body': json.dumps('Hello from Lambda!') + } + Handler: "update.lambda_handler" + MemorySize: 1024 + Timeout: 300 + Role: !GetAtt [ IamRoleAVDefinitions, Arn ] + Environment: + Variables: + AV_DEFINITION_S3_BUCKET: !Ref S3BucketAVDefinitions + Tags: + - Key: Service + Value: bucket-antivirus + + LambdaAVUpdateDefinitionsSchedule: + Type: "AWS::Events::Rule" + DependsOn: + - LambdaAVUpdateDefinitions + Properties: + Name: LambdaAVUpdateDefinitionsSchedule + Description: A schedule for the AV Update Definitions Lambda function. + ScheduleExpression: rate(3 hours) + State: ENABLED + Targets: + - Arn: !Sub ${LambdaAVUpdateDefinitions.Arn} + Id: LambdaAVUpdateDefinitionsSchedule + + LambdaAVUpdateDefinitionsSchedulePermission: + Type: "AWS::Lambda::Permission" + DependsOn: + - LambdaAVUpdateDefinitionsSchedule + Properties: + Action: 'lambda:InvokeFunction' + FunctionName: !Sub ${LambdaAVUpdateDefinitions.Arn} + Principal: 'events.amazonaws.com' + SourceArn: !Sub ${LambdaAVUpdateDefinitionsSchedule.Arn} + + LambdaAVScanner: + Type: AWS::Lambda::Function + Properties: + FunctionName: avScanner + Description: LambdaFunction to scan newly uploaded objects in S3. + Runtime: python3.7 + Code: + ZipFile: | + import json + def lambda_handler(event, context): + return { + 'statusCode': 200, 'body': json.dumps('Hello from Lambda!') + } + Handler: "scan.lambda_handler" + MemorySize: 1500 + Timeout: 300 + Role: !GetAtt [ IamRoleAVScanner, Arn ] + Environment: + Variables: + AV_DEFINITION_S3_BUCKET: !Ref S3BucketAVDefinitions + Tags: + - Key: Service + Value: bucket-antivirus + + + + Outputs: + + S3BucketAvDefinitions: + Value: !Ref S3BucketAVDefinitions + Description: S3 Bucket for the AV Definitions + + LambdaAVUpdateDefinitions: + Value: !Ref LambdaAVUpdateDefinitions + Description: Lambda function to update the Antivirus Definitions in its respective bucket + + LambdaAVScanner: + Value: !Ref LambdaAVScanner + Description: Lambda function to scan newly created S3 objects + + IamRoleAVScanner: + Value: !Ref IamRoleAVScanner + Description: IAM Role used by the Lambda Scanner function. Edit its policy to add/change source S3 buckets, and also to enable SNS functionality if desired \ No newline at end of file diff --git a/display_infected.py b/display_infected.py index 0c40bc98..dfb07077 100755 --- a/display_infected.py +++ b/display_infected.py @@ -20,7 +20,7 @@ import boto3 -from common import AV_SIGNATURE_METADATA +from common import AV_SIGNATURE_METADATA, S3_ENDPOINT from common import AV_SIGNATURE_OK from common import AV_SIGNATURE_UNKNOWN from common import AV_STATUS_METADATA @@ -30,7 +30,6 @@ # Get all objects in an S3 bucket that are infected def get_objects_and_sigs(s3_client, s3_bucket_name): - s3_object_list = [] s3_list_objects_result = {"IsTruncated": True} @@ -76,9 +75,8 @@ def object_infected(s3_client, s3_bucket_name, key_name): def main(s3_bucket_name): - # Verify the S3 bucket exists - s3_client = boto3.client("s3") + s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) try: s3_client.head_bucket(Bucket=s3_bucket_name) except Exception: @@ -87,7 +85,7 @@ def main(s3_bucket_name): # Scan the objects in the bucket s3_object_and_sigs_list = get_objects_and_sigs(s3_client, s3_bucket_name) - for (key_name, av_signature) in s3_object_and_sigs_list: + for key_name, av_signature in s3_object_and_sigs_list: print("Infected: {}/{}, {}".format(s3_bucket_name, key_name, av_signature)) diff --git a/display_infected_test.py b/display_infected_test.py index 86328c14..cd99ca1b 100644 --- a/display_infected_test.py +++ b/display_infected_test.py @@ -129,7 +129,6 @@ def test_get_objects_and_sigs_infected_with_sig_ok(self): self.assertEqual(s3_object_list, expected_object_list) def test_get_objects_and_sigs_clean(self): - get_object_tagging_response = { "VersionId": "abc123", "TagSet": [{"Key": AV_STATUS_METADATA, "Value": AV_STATUS_CLEAN}], @@ -150,7 +149,6 @@ def test_get_objects_and_sigs_clean(self): self.assertEqual(s3_object_list, expected_object_list) def test_get_objects_and_sigs_unscanned(self): - get_object_tagging_response = {"VersionId": "abc123", "TagSet": []} get_object_tagging_expected_params = { "Bucket": self.s3_bucket_name, diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..4f98214a --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,13 @@ +version: "3.8" + +services: + localstack: + container_name: localstack + image: localstack/localstack + ports: + - "127.0.0.1:4566:4566" + - "127.0.0.1:4510-4559:4510-4559" + environment: + - DOCKER_HOST=unix:///var/run/docker.sock + volumes: + - "/var/run/docker.sock:/var/run/docker.sock" \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt index b6f1ae9a..82c4c5aa 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,11 @@ # boto3 available by default in AWS but not locally boto3 +# awscli available by default in AWS but not locally +awscli + # Test requirements coverage -mock==3.0.5 -nose +mock==5.1.0 +pytest +pytest-cov diff --git a/requirements.txt b/requirements.txt index d221f3e1..345c2e8b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -certifi==2023.7.22 -chardet==3.0.4 -datadog==0.26.0 -decorator==4.3 -idna==2.8 +certifi==2023.11.17 +chardet==5.2.0 +datadog==0.47.0 +decorator==5.1.1 +idna==3.6 requests==2.31.0 -simplejson==3.16 -urllib3==1.26.18 -pytz==2019.3 +simplejson==3.19.2 +urllib3==2.0.7 +pytz==2023.3.post1 diff --git a/scan.py b/scan.py index 865674ab..a9fe1bf7 100644 --- a/scan.py +++ b/scan.py @@ -17,7 +17,6 @@ import json import os from urllib.parse import unquote_plus -from distutils.util import strtobool import boto3 import botocore @@ -38,12 +37,13 @@ from common import AV_STATUS_SNS_PUBLISH_CLEAN from common import AV_STATUS_SNS_PUBLISH_INFECTED from common import AV_TIMESTAMP_METADATA +from common import SNS_ENDPOINT +from common import S3_ENDPOINT from common import create_dir from common import get_timestamp def event_object(event, event_source="s3"): - # SNS events are slightly different if event_source.upper() == "SNS": event = json.loads(event["Records"][0]["Sns"]["Message"]) @@ -74,7 +74,7 @@ def event_object(event, event_source="s3"): raise Exception("Unable to retrieve object from event.\n{}".format(event)) # Create and return the object - s3 = boto3.resource("s3") + s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) return s3.Object(bucket_name, key_name) @@ -200,9 +200,9 @@ def sns_scan_results( def lambda_handler(event, context): - s3 = boto3.resource("s3") - s3_client = boto3.client("s3") - sns_client = boto3.client("sns") + s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) + s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) + sns_client = boto3.client("sns", endpoint_url=SNS_ENDPOINT) # Get some environment variables ENV = os.getenv("ENV", "") @@ -293,3 +293,18 @@ def lambda_handler(event, context): def str_to_bool(s): return bool(strtobool(str(s))) + + +def strtobool(val): + """Convert a string representation of truth to true (1) or false (0). + True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values + are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if + 'val' is anything else. + """ + val = val.lower() + if val in ("y", "yes", "t", "true", "on", "1"): + return 1 + elif val in ("n", "no", "f", "false", "off", "0"): + return 0 + else: + raise ValueError("invalid truth value %r" % (val,)) diff --git a/scan_bucket.py b/scan_bucket.py index 6043ffb0..d0cb7632 100755 --- a/scan_bucket.py +++ b/scan_bucket.py @@ -21,13 +21,13 @@ import boto3 -from common import AV_STATUS_METADATA +from common import AV_STATUS_METADATA, LAMBDA_ENDPOINT from common import AV_TIMESTAMP_METADATA +from common import S3_ENDPOINT # Get all objects in an S3 bucket that have not been previously scanned def get_objects(s3_client, s3_bucket_name): - s3_object_list = [] s3_list_objects_result = {"IsTruncated": True} @@ -62,7 +62,6 @@ def object_previously_scanned(s3_client, s3_bucket_name, key_name): # Scan an S3 object for viruses by invoking the lambda function # Skip any objects that have already been scanned def scan_object(lambda_client, lambda_function_name, s3_bucket_name, key_name): - print("Scanning: {}/{}".format(s3_bucket_name, key_name)) s3_event = format_s3_event(s3_bucket_name, key_name) lambda_invoke_result = lambda_client.invoke( @@ -87,7 +86,7 @@ def format_s3_event(s3_bucket_name, key_name): def main(lambda_function_name, s3_bucket_name, limit): # Verify the lambda exists - lambda_client = boto3.client("lambda") + lambda_client = boto3.client("lambda", endpoint_url=LAMBDA_ENDPOINT) try: lambda_client.get_function(FunctionName=lambda_function_name) except Exception: @@ -95,7 +94,7 @@ def main(lambda_function_name, s3_bucket_name, limit): sys.exit(1) # Verify the S3 bucket exists - s3_client = boto3.client("s3") + s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) try: s3_client.head_bucket(Bucket=s3_bucket_name) except Exception: diff --git a/scan_bucket_test.py b/scan_bucket_test.py index d191e87e..33b3e85b 100644 --- a/scan_bucket_test.py +++ b/scan_bucket_test.py @@ -56,7 +56,6 @@ def setUp(self): ) def test_get_objects_previously_scanned_status(self): - get_object_tagging_response = { "VersionId": "abc123", "TagSet": [{"Key": AV_STATUS_METADATA, "Value": AV_STATUS_INFECTED}], @@ -77,7 +76,6 @@ def test_get_objects_previously_scanned_status(self): self.assertEqual(s3_object_list, expected_object_list) def test_get_objects_previously_scanned_timestamp(self): - get_object_tagging_response = { "VersionId": "abc123", "TagSet": [{"Key": AV_TIMESTAMP_METADATA, "Value": get_timestamp()}], @@ -98,7 +96,6 @@ def test_get_objects_previously_scanned_timestamp(self): self.assertEqual(s3_object_list, expected_object_list) def test_get_objects_unscanned(self): - get_object_tagging_response = {"VersionId": "abc123", "TagSet": []} get_object_tagging_expected_params = { "Bucket": self.s3_bucket_name, @@ -128,4 +125,4 @@ def test_format_s3_event(self): } ] } - self.assertEquals(s3_event, expected_s3_event) + self.assertEqual(s3_event, expected_s3_event) diff --git a/scan_test.py b/scan_test.py index 0a18132a..d24e25e8 100644 --- a/scan_test.py +++ b/scan_test.py @@ -64,7 +64,7 @@ def test_sns_event_object(self): sns_event = {"Records": [{"Sns": {"Message": json.dumps(event)}}]} s3_obj = event_object(sns_event, event_source="sns") expected_s3_object = self.s3.Object(self.s3_bucket_name, self.s3_key_name) - self.assertEquals(s3_obj, expected_s3_object) + self.assertEqual(s3_obj, expected_s3_object) def test_s3_event_object(self): event = { @@ -79,25 +79,25 @@ def test_s3_event_object(self): } s3_obj = event_object(event) expected_s3_object = self.s3.Object(self.s3_bucket_name, self.s3_key_name) - self.assertEquals(s3_obj, expected_s3_object) + self.assertEqual(s3_obj, expected_s3_object) def test_s3_event_object_missing_bucket(self): event = {"Records": [{"s3": {"object": {"key": self.s3_key_name}}}]} with self.assertRaises(Exception) as cm: event_object(event) - self.assertEquals(cm.exception.message, "No bucket found in event!") + self.assertEqual(cm.exception.message, "No bucket found in event!") def test_s3_event_object_missing_key(self): event = {"Records": [{"s3": {"bucket": {"name": self.s3_bucket_name}}}]} with self.assertRaises(Exception) as cm: event_object(event) - self.assertEquals(cm.exception.message, "No key found in event!") + self.assertEqual(cm.exception.message, "No key found in event!") def test_s3_event_object_bucket_key_missing(self): event = {"Records": [{"s3": {"bucket": {}, "object": {}}}]} with self.assertRaises(Exception) as cm: event_object(event) - self.assertEquals( + self.assertEqual( cm.exception.message, "Unable to retrieve object from event.\n{}".format(event), ) @@ -106,7 +106,7 @@ def test_s3_event_object_no_records(self): event = {"Records": []} with self.assertRaises(Exception) as cm: event_object(event) - self.assertEquals(cm.exception.message, "No records found in event!") + self.assertEqual(cm.exception.message, "No records found in event!") def test_verify_s3_object_version(self): s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) @@ -165,7 +165,7 @@ def test_verify_s3_object_versioning_not_enabled(self): with self.assertRaises(Exception) as cm: with s3_stubber_resource: verify_s3_object_version(self.s3, s3_obj) - self.assertEquals( + self.assertEqual( cm.exception.message, "Object versioning is not enabled in bucket {}".format( self.s3_bucket_name @@ -220,7 +220,7 @@ def test_verify_s3_object_version_multiple_versions(self): with self.assertRaises(Exception) as cm: with s3_stubber_resource: verify_s3_object_version(self.s3, s3_obj) - self.assertEquals( + self.assertEqual( cm.exception.message, "Detected multiple object versions in {}.{}, aborting processing".format( self.s3_bucket_name, self.s3_key_name @@ -267,7 +267,7 @@ def test_get_local_path(self): s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) file_path = get_local_path(s3_obj, local_prefix) expected_file_path = "/tmp/test_bucket/test_key" - self.assertEquals(file_path, expected_file_path) + self.assertEqual(file_path, expected_file_path) def test_set_av_metadata(self): scan_result = "CLEAN" @@ -424,7 +424,7 @@ def test_delete_s3_object_exception(self): with s3_stubber: s3_obj = self.s3.Object(self.s3_bucket_name, self.s3_key_name) delete_s3_object(s3_obj) - self.assertEquals( + self.assertEqual( cm.exception.message, "Failed to delete infected file: {}.{}".format( self.s3_bucket_name, self.s3_key_name diff --git a/scripts/run-scan-lambda b/scripts/run-scan-lambda deleted file mode 100755 index c70e1e41..00000000 --- a/scripts/run-scan-lambda +++ /dev/null @@ -1,52 +0,0 @@ -#! /usr/bin/env bash - -set -eu -o pipefail - -# -# Run the scan.lambda_handler locally in a docker container -# - -if [ $# -lt 2 ]; then - echo 1>&2 "$0: not enough arguments. Please provide BUCKET and KEY" - exit 1 -fi - -BUCKET=$1 -KEY=$2 -EVENT="{\"Records\": [{\"s3\": {\"bucket\": {\"name\": \"${BUCKET}\"}, \"object\": {\"key\": \"${KEY}\"}}}]}" -echo "Sending S3 event: ${EVENT}" - -# Verify that the file exists first -aws s3 ls "s3://${BUCKET}/${KEY}" - -rm -rf tmp/ -unzip -qq -d ./tmp build/lambda.zip - -NAME="antivirus-scan" - -docker run --rm \ - -v "$(pwd)/tmp/:/var/task" \ - -e AV_DEFINITION_S3_BUCKET \ - -e AV_DEFINITION_S3_PREFIX \ - -e AV_DELETE_INFECTED_FILES \ - -e AV_PROCESS_ORIGINAL_VERSION_ONLY \ - -e AV_SCAN_START_METADATA \ - -e AV_SCAN_START_SNS_ARN \ - -e AV_SIGNATURE_METADATA \ - -e AV_STATUS_CLEAN \ - -e AV_STATUS_INFECTED \ - -e AV_STATUS_METADATA \ - -e AV_STATUS_SNS_ARN \ - -e AV_STATUS_SNS_PUBLISH_CLEAN \ - -e AV_STATUS_SNS_PUBLISH_INFECTED \ - -e AV_TIMESTAMP_METADATA \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_DEFAULT_REGION \ - -e AWS_REGION \ - -e AWS_SECRET_ACCESS_KEY \ - -e AWS_SESSION_TOKEN \ - --memory="${MEM}" \ - --memory-swap="${MEM}" \ - --cpus="${CPUS}" \ - --name="${NAME}" \ - lambci/lambda:python3.7 scan.lambda_handler "${EVENT}" diff --git a/scripts/run-update-lambda b/scripts/run-update-lambda deleted file mode 100755 index 3d24defa..00000000 --- a/scripts/run-update-lambda +++ /dev/null @@ -1,29 +0,0 @@ -#! /usr/bin/env bash - -set -eu -o pipefail - -# -# Run the update.lambda_handler locally in a docker container -# - -rm -rf tmp/ -unzip -qq -d ./tmp build/lambda.zip - -NAME="antivirus-update" - -docker run --rm \ - -v "$(pwd)/tmp/:/var/task" \ - -e AV_DEFINITION_PATH \ - -e AV_DEFINITION_S3_BUCKET \ - -e AV_DEFINITION_S3_PREFIX \ - -e AWS_ACCESS_KEY_ID \ - -e AWS_DEFAULT_REGION \ - -e AWS_REGION \ - -e AWS_SECRET_ACCESS_KEY \ - -e AWS_SESSION_TOKEN \ - -e CLAMAVLIB_PATH \ - --memory="${MEM}" \ - --memory-swap="${MEM}" \ - --cpus="${CPUS}" \ - --name="${NAME}" \ - lambci/lambda:python3.7 update.lambda_handler diff --git a/update.py b/update.py index e1e9ea07..671f7657 100644 --- a/update.py +++ b/update.py @@ -22,13 +22,14 @@ from common import AV_DEFINITION_S3_BUCKET from common import AV_DEFINITION_S3_PREFIX from common import CLAMAVLIB_PATH +from common import S3_ENDPOINT from common import get_timestamp import shutil def lambda_handler(event, context): - s3 = boto3.resource("s3") - s3_client = boto3.client("s3") + s3 = boto3.resource("s3", endpoint_url=S3_ENDPOINT) + s3_client = boto3.client("s3", endpoint_url=S3_ENDPOINT) print("Script starting at %s\n" % (get_timestamp()))