You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
{{ message }}
This repository has been archived by the owner on Dec 18, 2024. It is now read-only.
Here is my spider which works fine until I update settings.py.
class AwesomeSpider(scrapy.Spider):
name = "awesome"
def start_requests(self):
# GET request
yield scrapy.Request("https://httpbin.org/get", meta={"nodriver": True})
def parse(self, response, **kwargs):
# 'response' contains the page as seen by the browser
return {"url": response.url}
Errors
2024-12-16 14:41:42 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://httpbin.org/robots.txt> (referer: None)
2024-12-16 14:41:42 [scrapy.core.scraper] ERROR: Error downloading <GET https://httpbin.org/get>
Traceback (most recent call last):
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/twisted/internet/defer.py", line 2013, in _inlineCallbacks
result = context.run(
^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/twisted/python/failure.py", line 467, in throwExceptionIntoGenerator
return g.throw(self.value.with_traceback(self.tb))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/scrapy/core/downloader/middleware.py", line 68, in process_request
return (yield download_func(request, spider))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/twisted/internet/defer.py", line 1253, in adapt
extracted: _SelfResultT | Failure = result.result()
^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/scrapy_nodriver/handler.py", line 142, in _download_request
page = await self._create_page(request=request, spider=spider)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/scrapy_nodriver/handler.py", line 95, in _create_page
browser = await uc.start(headless=self.config.headless)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/util.py", line 95, in start
return await Browser.create(config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/browser.py", line 90, in create
await instance.start()
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/browser.py", line 393, in start
await self.connection.send(cdp.target.set_discover_targets(discover=True))
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/connection.py", line 413, in send
await self._prepare_headless()
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/connection.py", line 492, in _prepare_headless
response, error = await self._send_oneshot(
^^^^^^^^^^^^^^^
TypeError: cannot unpack non-iterable NoneType object
2024-12-16 14:41:42 [scrapy.core.engine] INFO: Closing spider (finished)
When I run the following test everything works fine:
import asyncio
import nodriver as uc
Here is my spider which works fine until I update settings.py.
class AwesomeSpider(scrapy.Spider):
name = "awesome"
Errors
2024-12-16 14:41:42 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://httpbin.org/robots.txt> (referer: None)
2024-12-16 14:41:42 [scrapy.core.scraper] ERROR: Error downloading <GET https://httpbin.org/get>
Traceback (most recent call last):
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/twisted/internet/defer.py", line 2013, in _inlineCallbacks
result = context.run(
^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/twisted/python/failure.py", line 467, in throwExceptionIntoGenerator
return g.throw(self.value.with_traceback(self.tb))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/scrapy/core/downloader/middleware.py", line 68, in process_request
return (yield download_func(request, spider))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/twisted/internet/defer.py", line 1253, in adapt
extracted: _SelfResultT | Failure = result.result()
^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/scrapy_nodriver/handler.py", line 142, in _download_request
page = await self._create_page(request=request, spider=spider)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/scrapy_nodriver/handler.py", line 95, in _create_page
browser = await uc.start(headless=self.config.headless)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/util.py", line 95, in start
return await Browser.create(config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/browser.py", line 90, in create
await instance.start()
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/browser.py", line 393, in start
await self.connection.send(cdp.target.set_discover_targets(discover=True))
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/connection.py", line 413, in send
await self._prepare_headless()
File "/home/simon/envs/scrapyNodriver/venv/lib/python3.12/site-packages/nodriver/core/connection.py", line 492, in _prepare_headless
response, error = await self._send_oneshot(
^^^^^^^^^^^^^^^
TypeError: cannot unpack non-iterable NoneType object
2024-12-16 14:41:42 [scrapy.core.engine] INFO: Closing spider (finished)
When I run the following test everything works fine:
import asyncio
import nodriver as uc
async def main():
browser = await uc.start()
page = await browser.get("https://example.org")
await page.save_screenshot(filename="example.jpeg", full_page=True)
await page.close()
if name == 'main':
uc.loop().run_until_complete(main())
Thanks in advance for any help.
Simon
The text was updated successfully, but these errors were encountered: