From 46c737cec179a81c57833830c6d74ca05bd8f55b Mon Sep 17 00:00:00 2001 From: igor santos Date: Wed, 23 Jul 2014 13:54:16 -0300 Subject: [PATCH] config scrapy-mongodb pipeline --- .gitignore | 5 +++++ pyjobs/settings.py | 12 ++++++++++-- requirements.txt | 1 + scrapy.cfg | 2 +- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 8da6246..8c026c8 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,8 @@ coverage.xml # Sphinx documentation docs/_build/ +# Scrapyd files +dbs/ +items/ +logs/ +*.pid diff --git a/pyjobs/settings.py b/pyjobs/settings.py index 95a8a87..26a37e4 100644 --- a/pyjobs/settings.py +++ b/pyjobs/settings.py @@ -14,8 +14,16 @@ NEWSPIDER_MODULE = 'pyjobs.spiders' ITEM_PIPELINES = { - 'pyjobs.pipelines.MongoPipeline': 0, + 'scrapy_mongodb.MongoDBPipeline': 0, } -# Crawl responsibly by identifying yourself (and your website) on the user-agent +# Config MongoDB +MONGODB_URI = 'mongodb://localhost:27017' +MONGODB_DATABASE = 'pyjobs' +MONGODB_COLLECTION = 'jobs' +MONGODB_UNIQUE_KEY = 'uid' + + +# Crawl responsibly by identifying yourself (and your website) +# on the user-agent #USER_AGENT = 'pyjobs (+http://www.yourdomain.com)' diff --git a/requirements.txt b/requirements.txt index 8424498..9e57fff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ pyasn1-modules==0.0.5 pycparser==2.10 pymongo==2.7.1 queuelib==1.1.1 +scrapy-mongodb==0.7.1 service-identity==1.0.0 six==1.7.3 w3lib==1.6 diff --git a/scrapy.cfg b/scrapy.cfg index 21d1577..ad41fb4 100644 --- a/scrapy.cfg +++ b/scrapy.cfg @@ -7,5 +7,5 @@ default = pyjobs.settings [deploy] -#url = http://localhost:6800/ +url = http://localhost:6800/ project = pyjobs