From de107782daf4a57f4b7e6fcb36ed4b43d094489d Mon Sep 17 00:00:00 2001 From: Nathan Day Date: Mon, 18 May 2020 13:57:22 -0400 Subject: [PATCH 1/6] bring back shell script for docker run; use list() to wrap generator in indexTmdb.py --- docker.sh | 3 +++ indexTmdb.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100755 docker.sh diff --git a/docker.sh b/docker.sh new file mode 100755 index 0000000..0a8c49d --- /dev/null +++ b/docker.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +docker run -p 8983:8983 -v $(PWD)/solr_home:/opt/mysolrhome -e SOLR_HOME=/opt/mysolrhome -e INIT_SOLR_HOME=yes solr:8.4.1 diff --git a/indexTmdb.py b/indexTmdb.py index c35e668..f1d754d 100644 --- a/indexTmdb.py +++ b/indexTmdb.py @@ -30,4 +30,4 @@ def indexableMovies(): if __name__ == "__main__": solr = pysolr.Solr('http://localhost:8983/solr/tmdb', timeout=100) - solr.add(indexableMovies(), commit=True) + solr.add(list(indexableMovies()), commit=True) From 291d9ce9de086ae363b5801c9ab5422537278dc7 Mon Sep 17 00:00:00 2001 From: Nathan Day Date: Wed, 20 May 2020 11:11:41 -0400 Subject: [PATCH 2/6] new tmdb data --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 414e953..6ff4c3b 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,10 @@ Regardless of the option you choose, navigate to [http://localhost:8983/solr/](h # Index TMDB movies -1. Download [tmdb.json](http://es-learn-to-rank.labs.o19s.com/tmdb.json) +1. Download [tmdb.json](https://o19s-public-datasets.s3.amazonaws.com/tmdb_2020-05-20.json) ``` -curl -o tmdb.json http://es-learn-to-rank.labs.o19s.com/tmdb.json +curl -o tmdb.json https://o19s-public-datasets.s3.amazonaws.com/tmdb_2020-05-20.json ``` 2. Install the [pysolr](https://github.com/django-haystack/pysolr) library From a3e1bcd59c70e5acc27e5cdc10e5fcb5620872ef Mon Sep 17 00:00:00 2001 From: Nathan Day Date: Wed, 20 May 2020 11:28:11 -0400 Subject: [PATCH 3/6] new data indexed --- README.md | 5 ----- ...E-postman_collection.json => solr_postman_collection.json | 0 2 files changed, 5 deletions(-) rename solr-TLRE-postman_collection.json => solr_postman_collection.json (100%) diff --git a/README.md b/README.md index 6ff4c3b..b31083e 100644 --- a/README.md +++ b/README.md @@ -92,8 +92,3 @@ If you want to use Postman during the TLRE class: 4. Tinker with the base URL, Params or JSON Body (optional) 5. Press 'Send' (blue rectangle button right of URL bar) -This collection is also valuable for testing examples against new versions of Elasticsearch. Using Postman's command line tool [Newman](https://github.com/postmanlabs/newman) you can check all of the requests in the collection: - -``` -newman run --global-var "solr_host=localhost:8983" solr-TLRE-postman_collection.json -``` diff --git a/solr-TLRE-postman_collection.json b/solr_postman_collection.json similarity index 100% rename from solr-TLRE-postman_collection.json rename to solr_postman_collection.json From d839bdb5c1441495889506e8d90a37781ce79b40 Mon Sep 17 00:00:00 2001 From: Nathan Day Date: Wed, 20 May 2020 11:29:30 -0400 Subject: [PATCH 4/6] refresh reqs --- requirements.txt | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3d5149e..83e62f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,6 @@ -certifi==2019.9.11 +certifi==2020.4.5.1 chardet==3.0.4 -idna==2.8 -pipenv==2018.11.26 -pysolr==3.8.1 -requests==2.22.0 -urllib3==1.25.6 -virtualenv==16.7.7 -virtualenv-clone==0.5.3 +idna==2.9 +pysolr==3.9.0 +requests==2.23.0 +urllib3==1.25.9 From 7f93eeb278f33ce8093d12e4ec23b67061cd703e Mon Sep 17 00:00:00 2001 From: Nathan Day Date: Wed, 20 May 2020 11:40:46 -0400 Subject: [PATCH 5/6] testing added --- .testing/README.md | 28 ++++++++++++++++++++++++++++ .testing/requirements.txt | 15 +++++++++++++++ .testing/splainer_links_solr.csv | 20 ++++++++++++++++++++ .testing/splainer_puppet_solr.py | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+) create mode 100644 .testing/README.md create mode 100644 .testing/requirements.txt create mode 100644 .testing/splainer_links_solr.csv create mode 100644 .testing/splainer_puppet_solr.py diff --git a/.testing/README.md b/.testing/README.md new file mode 100644 index 0000000..cc8834d --- /dev/null +++ b/.testing/README.md @@ -0,0 +1,28 @@ +## Testing TLRE examples + +TLRE examples are vunerable to changes in external tooling (Splainer) and Solr itself. So to ensure things are ready to go for training we've scripted these "tests" to check all of the examples. + +#### Splainer + +These tests check that changes to Splainer don't damage TLRE examples. + +Splainer links from the slides are stored in `splainer_links_solr.csv`. The script `splainer_puppet_es.py` will visit each one of the links and report the HTTP status code back. + +``` +# pip install -r requirments.txt + +python splainer_puppet_solr.py +``` + +This will record the status code in the CSV file and print the number of failed queries to console. + +#### Newman + +These tests check that version change in Elasticsearch don't damage TLRE examples. + +[Newman](https://github.com/postmanlabs/newman) is the command line tool for managing Postman collections. All examples from the class, beyond just the links to Splainer, are included in the collection `../es-postman-collection.json` + +``` +newman run --global-var "solr_host=localhost:8983" ../solr_postman_collection.json +``` + diff --git a/.testing/requirements.txt b/.testing/requirements.txt new file mode 100644 index 0000000..211772c --- /dev/null +++ b/.testing/requirements.txt @@ -0,0 +1,15 @@ +appdirs==1.4.4 +certifi==2020.4.5.1 +chardet==3.0.4 +idna==2.9 +numpy==1.18.4 +pandas==1.0.3 +pyee==7.0.2 +pyppeteer==0.2.2 +python-dateutil==2.8.1 +pytz==2020.1 +requests==2.23.0 +six==1.14.0 +tqdm==4.46.0 +urllib3==1.25.9 +websockets==8.1 diff --git a/.testing/splainer_links_solr.csv b/.testing/splainer_links_solr.csv new file mode 100644 index 0000000..2f04a1f --- /dev/null +++ b/.testing/splainer_links_solr.csv @@ -0,0 +1,20 @@ +Unnamed: 0,Location (Day.Module.Slide),URL,Code +0,2.1.15,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dbasketball%20cartoon%20aliens%26tie%3D1.0%26fl%3Dtitle%20id%20overview&fieldSpec=title%20id%20overview,200 +1,2.2.4,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dcast:(will%20smith)%26fl%3Dtitle%20overview%20cast&fieldSpec=title%20overview%20cast,200 +2,2.2.8,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people:%22william%20shatner%22%26fl%3Dtitle%20overview%20cast%20directors&fieldSpec=title%20overview%20cast%20directors,200 +3,2.2.9,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people:%22william%20shatner%22%26fl%3Dtitle%20overview%20cast%20directors&fieldSpec=title%20overview%20cast%20directors,200 +4,2.2.13,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people_notf:%22william%20shatner%22%26fl%3Dtitle%20overview%20cast%20directors&fieldSpec=title%20overview%20cast%20directors,200 +5,2.2.17,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people_notf:%22william%20shatner%22%26defType%3Dedismax%26bf%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26fl%3Dtitle%20overview%20cast%20directors%20release_date&fieldSpec=title%20overview%20cast%20directors%20release_date",200 +6,2.3.7,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dwill%20smith%26bq%3D%7B!edismax%20mm%3D100%2525%20bq%3D''%20qf%3Dtext_people%20pf%3Dcast%20v%3D$q%7D%26fl%3Dtitle%20overview%20release_date%20cast%20directors&fieldSpec=title%20overview%20release_date%20cast%20directors,200 +7,2.3.8,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fs olr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dharrison%20ford%26peopleQuery%3D%7B!edismax%20bf%3D''%20bq%3D''%20qf%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26bf%3Dif(query($peopleQuery),$dateBoost,0)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost",404 +8,2.3.9,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dharrison%20ford%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dif(query($peopleQuery),$prodDateBoost,0)%26bf%3D$totalDateBoost%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +9,2.3.10,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +10,2.3.11,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +11,2.3.12,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +12,2.3.13,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +13,2.3.14,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26bf%3Dif(query($peopleQuery),product($dateBoost,100),0)%26bf%3Dproduct($titleSearch,pow(vote_average,2))%26tie%3D1.0%26titleSearch%3D%7B!edismax%20bf%3D''%20pf%3Dtitle%20bq%3D''%20qf%3Dtitle%20mm%3D100%2525%20v%3D$q%7D%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost",400 +14,2.4.8,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdefType%3Dedismax%26qf%3Dtitle_bidirect_syn%26indent%3Don%26q%3Dbride%26fl%3Dtitle%20tagline&fieldSpec=title%20tagline,200 +15,2.4.8.b,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdefType%3Dedismax%26qf%3Dtitle_directed_syn%26indent%3Don%26q%3Dbride%26fl%3Dtitle%20tagline&fieldSpec=title%20tagline,200 +16,2.4.17,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtitle_ws_syn%20overview_ws_syn%20tagline_ws_syn%26indent%3Don%26q%3Dsci%20fi%26tie%3D1.0%26sow%3Dfalse%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count,200 +17,2.4.18,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtitle_auto_phrase_syn%20overview_auto_phrase_syn%26indent%3Don%26q%3Dbest%20sci%20fi%20movie%26tie%3D1.0%26sow%3Dfalse%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count,200 +18,2.4.21,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26pf%3Dtext_all_idioms%26q%3Dsci%20fi%20movie%26qf%3Dtext_all_idioms%26rows%3D10%26stopwords%3Dtrue%26fl%3Did%20title%20overview%20tagline&fieldSpec=id%20title%20overview%20tagline,200 diff --git a/.testing/splainer_puppet_solr.py b/.testing/splainer_puppet_solr.py new file mode 100644 index 0000000..286ac15 --- /dev/null +++ b/.testing/splainer_puppet_solr.py @@ -0,0 +1,32 @@ +import asyncio +from pyppeteer import launch +import requests +import pandas as pd + +file = 'Splainer_links_Solr.csv' +dat = pd.read_csv(file) +codes = [] + +# async def main(): +# browser = await launch() +# for url in dat["URL"]: +# page = await browser.newPage() +# await page.goto(url, {'waitUntil': 'networkidle0'}) +# solr_link = await page.querySelector('div.alert:nth-child(2) > div:nth-child(1) > a:nth-child(1)') +# content = await page.evaluate('(element) => element.getAttribute("href")', solr_link) +# r = requests.get(content) +# code = r.status_code +# codes.append(code) +# await browser.close() +# dat["Code"] = codes +# dat.to_csv(file, index=False) +# print(f"{sum([x != 200 for x in codes])} of {len(codes)} of splainer links failed.") + +async def main(): + browser = await launch() + page = await browser.newPage() + await page.goto('http://splainer.io/#/es_?esUrl=http:%2F%2Flocalhost:9200%2Ftmdb%2F_search&esQuery=%7B%0A%20%20%22query%22:%20%7B%0A%20%20%20%20%20%20%22multi_match%22:%20%7B%0A%20%20%20%20%20%20%20%20%20%20%22query%22:%20%22will%20smith%22,%0A%20%20%20%20%20%20%20%20%20%20%22type%22:%20%22best_fields%22,%0A%20%20%20%20%20%20%20%20%20%20%22fields%22:%20%5B%22title%22,%20%22tagline%22,%20%22overview%22,%20%22cast%22%5D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%0A%20%20%7D%0A%7D%20%20%20%20%0A&fieldSpec=title%20cast%20directors%20release_date') + await browser.close() + + +asyncio.get_event_loop().run_until_complete(main()) From 2ba42f187585d4d8c4153a26ed039e2c64b27046 Mon Sep 17 00:00:00 2001 From: Nathan Day Date: Wed, 20 May 2020 11:43:57 -0400 Subject: [PATCH 6/6] typos in testing readme --- .testing/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.testing/README.md b/.testing/README.md index cc8834d..797e46a 100644 --- a/.testing/README.md +++ b/.testing/README.md @@ -18,7 +18,7 @@ This will record the status code in the CSV file and print the number of failed #### Newman -These tests check that version change in Elasticsearch don't damage TLRE examples. +These tests check that version changes in Solr don't damage TLRE examples. [Newman](https://github.com/postmanlabs/newman) is the command line tool for managing Postman collections. All examples from the class, beyond just the links to Splainer, are included in the collection `../es-postman-collection.json`