diff --git a/.private-env b/.private-env new file mode 100644 index 0000000..b88d651 --- /dev/null +++ b/.private-env @@ -0,0 +1,3 @@ +# fscrawler +export ELASTIC_VERSION=7.17.0 +export FSCRAWLER_VERSION=2.10-SNAPSHOT-ocr-es6 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..484fdae --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# docker-compose-fscrawler + +> Mostly inspired by [fscrawler docs](https://fscrawler.readthedocs.io/en/latest/dev/doc.html) + + +## What +> You can build a basic search engine using elasticsearch & fscrawler. Quickly start up this using docker compose. + + +## How to use +### Source version env file + +``` +# export ELASTIC_VERSION=7.17.0 +# export FSCRAWLER_VERSION=2.10-SNAPSHOT-ocr-es6 +source .private-env +``` + +### Run elasticsearch. + +``` +docker-compose up -d elasticsearch +docker-compose logs -f elasticsearch +``` + +### Run fscrawler + +``` +docker-compose up fscrawler +``` diff --git a/config/job_name/_settings.yaml b/config/job_name/_settings.yaml new file mode 100644 index 0000000..ccf7a2e --- /dev/null +++ b/config/job_name/_settings.yaml @@ -0,0 +1,4 @@ +name: "job_name" +elasticsearch: + nodes: + - url: "http://elasticsearch:9200" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..10c7b1b --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,43 @@ +version: '3' +services: + # Elasticsearch Cluster + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:$ELASTIC_VERSION + container_name: elasticsearch + environment: + - bootstrap.memory_lock=true + - discovery.type=single-node + restart: always + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - data:/usr/share/elasticsearch/data + ports: + - 9200:9200 + networks: + - fscrawler_net + + # FSCrawler + fscrawler: + image: dadoonet/fscrawler:$FSCRAWLER_VERSION + container_name: fscrawler + restart: always + volumes: + - ${PWD}/config:/root/.fscrawler + - ${PWD}/logs:/usr/share/fscrawler/logs + - ../../test-documents/src/main/resources/documents/:/tmp/es:ro + depends_on: + - elasticsearch + command: fscrawler --rest idx + networks: + - fscrawler_net + +volumes: + data: + driver: local + +networks: + fscrawler_net: + driver: bridge \ No newline at end of file