diff --git a/.testing/README.md b/.testing/README.md new file mode 100644 index 0000000..797e46a --- /dev/null +++ b/.testing/README.md @@ -0,0 +1,28 @@ +## Testing TLRE examples + +TLRE examples are vunerable to changes in external tooling (Splainer) and Solr itself. So to ensure things are ready to go for training we've scripted these "tests" to check all of the examples. + +#### Splainer + +These tests check that changes to Splainer don't damage TLRE examples. + +Splainer links from the slides are stored in `splainer_links_solr.csv`. The script `splainer_puppet_es.py` will visit each one of the links and report the HTTP status code back. + +``` +# pip install -r requirments.txt + +python splainer_puppet_solr.py +``` + +This will record the status code in the CSV file and print the number of failed queries to console. + +#### Newman + +These tests check that version changes in Solr don't damage TLRE examples. + +[Newman](https://github.com/postmanlabs/newman) is the command line tool for managing Postman collections. All examples from the class, beyond just the links to Splainer, are included in the collection `../es-postman-collection.json` + +``` +newman run --global-var "solr_host=localhost:8983" ../solr_postman_collection.json +``` + diff --git a/.testing/requirements.txt b/.testing/requirements.txt new file mode 100644 index 0000000..211772c --- /dev/null +++ b/.testing/requirements.txt @@ -0,0 +1,15 @@ +appdirs==1.4.4 +certifi==2020.4.5.1 +chardet==3.0.4 +idna==2.9 +numpy==1.18.4 +pandas==1.0.3 +pyee==7.0.2 +pyppeteer==0.2.2 +python-dateutil==2.8.1 +pytz==2020.1 +requests==2.23.0 +six==1.14.0 +tqdm==4.46.0 +urllib3==1.25.9 +websockets==8.1 diff --git a/.testing/splainer_links_solr.csv b/.testing/splainer_links_solr.csv new file mode 100644 index 0000000..2f04a1f --- /dev/null +++ b/.testing/splainer_links_solr.csv @@ -0,0 +1,20 @@ +Unnamed: 0,Location (Day.Module.Slide),URL,Code +0,2.1.15,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dbasketball%20cartoon%20aliens%26tie%3D1.0%26fl%3Dtitle%20id%20overview&fieldSpec=title%20id%20overview,200 +1,2.2.4,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dcast:(will%20smith)%26fl%3Dtitle%20overview%20cast&fieldSpec=title%20overview%20cast,200 +2,2.2.8,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people:%22william%20shatner%22%26fl%3Dtitle%20overview%20cast%20directors&fieldSpec=title%20overview%20cast%20directors,200 +3,2.2.9,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people:%22william%20shatner%22%26fl%3Dtitle%20overview%20cast%20directors&fieldSpec=title%20overview%20cast%20directors,200 +4,2.2.13,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people_notf:%22william%20shatner%22%26fl%3Dtitle%20overview%20cast%20directors&fieldSpec=title%20overview%20cast%20directors,200 +5,2.2.17,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3Fq%3Dtext_people_notf:%22william%20shatner%22%26defType%3Dedismax%26bf%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26fl%3Dtitle%20overview%20cast%20directors%20release_date&fieldSpec=title%20overview%20cast%20directors%20release_date",200 +6,2.3.7,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dwill%20smith%26bq%3D%7B!edismax%20mm%3D100%2525%20bq%3D''%20qf%3Dtext_people%20pf%3Dcast%20v%3D$q%7D%26fl%3Dtitle%20overview%20release_date%20cast%20directors&fieldSpec=title%20overview%20release_date%20cast%20directors,200 +7,2.3.8,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fs olr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dharrison%20ford%26peopleQuery%3D%7B!edismax%20bf%3D''%20bq%3D''%20qf%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26bf%3Dif(query($peopleQuery),$dateBoost,0)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost",404 +8,2.3.9,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dharrison%20ford%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dif(query($peopleQuery),$prodDateBoost,0)%26bf%3D$totalDateBoost%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +9,2.3.10,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +10,2.3.11,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +11,2.3.12,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +12,2.3.13,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26titleQuery%3D%7B!edismax%20mm%3D100%2525%20qf%3Dtitle%20bq%3D''%20bf%3D''%20v%3D$q%7D%26bf%3Dif($titleQuery,product(100,vote_average),0)%26bf%3D$totalDateBoost%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26prodDateBoost%3Dproduct(10000,$dateBoost)%26totalDateBoost%3Dproduct(query($peopleQuery),$prodDateBoost)%26tie%3D1.0%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost%20f:$prodDateBoost%20f:$totalDateBoost",400 +13,2.3.14,"http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtext_all%26indent%3Don%26q%3Dstar%20wars%26peopleQuery%3D%7B!field%20bf%3D''%20bq%3D''%20f%3Dtext_people%20mm%3D100%2525%20v%3D$q%7D%26dateBoost%3Drecip(ms(NOW,release_date),3.16e-11,10,1)%26bf%3Dif(query($peopleQuery),product($dateBoost,100),0)%26bf%3Dproduct($titleSearch,pow(vote_average,2))%26tie%3D1.0%26titleSearch%3D%7B!edismax%20bf%3D''%20pf%3Dtitle%20bq%3D''%20qf%3Dtitle%20mm%3D100%2525%20v%3D$q%7D%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count%20f:$dateBoost",400 +14,2.4.8,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdefType%3Dedismax%26qf%3Dtitle_bidirect_syn%26indent%3Don%26q%3Dbride%26fl%3Dtitle%20tagline&fieldSpec=title%20tagline,200 +15,2.4.8.b,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdefType%3Dedismax%26qf%3Dtitle_directed_syn%26indent%3Don%26q%3Dbride%26fl%3Dtitle%20tagline&fieldSpec=title%20tagline,200 +16,2.4.17,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtitle_ws_syn%20overview_ws_syn%20tagline_ws_syn%26indent%3Don%26q%3Dsci%20fi%26tie%3D1.0%26sow%3Dfalse%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count,200 +17,2.4.18,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26qf%3Dtitle_auto_phrase_syn%20overview_auto_phrase_syn%26indent%3Don%26q%3Dbest%20sci%20fi%20movie%26tie%3D1.0%26sow%3Dfalse%26fl%3Dtitle%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count&fieldSpec=title:title%20tagline%20text_people%20overview%20release_date%20vote_average%20vote_count,200 +18,2.4.21,http://splainer.io/#?solr=http:%2F%2Flocalhost:8983%2Fsolr%2Ftmdb%2Fselect%3FdebugQuery%3Don%26defType%3Dedismax%26pf%3Dtext_all_idioms%26q%3Dsci%20fi%20movie%26qf%3Dtext_all_idioms%26rows%3D10%26stopwords%3Dtrue%26fl%3Did%20title%20overview%20tagline&fieldSpec=id%20title%20overview%20tagline,200 diff --git a/.testing/splainer_puppet_solr.py b/.testing/splainer_puppet_solr.py new file mode 100644 index 0000000..286ac15 --- /dev/null +++ b/.testing/splainer_puppet_solr.py @@ -0,0 +1,32 @@ +import asyncio +from pyppeteer import launch +import requests +import pandas as pd + +file = 'Splainer_links_Solr.csv' +dat = pd.read_csv(file) +codes = [] + +# async def main(): +# browser = await launch() +# for url in dat["URL"]: +# page = await browser.newPage() +# await page.goto(url, {'waitUntil': 'networkidle0'}) +# solr_link = await page.querySelector('div.alert:nth-child(2) > div:nth-child(1) > a:nth-child(1)') +# content = await page.evaluate('(element) => element.getAttribute("href")', solr_link) +# r = requests.get(content) +# code = r.status_code +# codes.append(code) +# await browser.close() +# dat["Code"] = codes +# dat.to_csv(file, index=False) +# print(f"{sum([x != 200 for x in codes])} of {len(codes)} of splainer links failed.") + +async def main(): + browser = await launch() + page = await browser.newPage() + await page.goto('http://splainer.io/#/es_?esUrl=http:%2F%2Flocalhost:9200%2Ftmdb%2F_search&esQuery=%7B%0A%20%20%22query%22:%20%7B%0A%20%20%20%20%20%20%22multi_match%22:%20%7B%0A%20%20%20%20%20%20%20%20%20%20%22query%22:%20%22will%20smith%22,%0A%20%20%20%20%20%20%20%20%20%20%22type%22:%20%22best_fields%22,%0A%20%20%20%20%20%20%20%20%20%20%22fields%22:%20%5B%22title%22,%20%22tagline%22,%20%22overview%22,%20%22cast%22%5D%0A%20%20%20%20%20%20%7D%0A%20%20%20%20%0A%20%20%7D%0A%7D%20%20%20%20%0A&fieldSpec=title%20cast%20directors%20release_date') + await browser.close() + + +asyncio.get_event_loop().run_until_complete(main()) diff --git a/README.md b/README.md index 7c29cbd..bc00322 100644 --- a/README.md +++ b/README.md @@ -45,10 +45,10 @@ Regardless of the option you choose, navigate to [http://localhost:8983/solr/](h # Index TMDB movies -1. Download [tmdb.json](http://es-learn-to-rank.labs.o19s.com/tmdb.json) +1. Download [tmdb.json](https://o19s-public-datasets.s3.amazonaws.com/tmdb_2020-05-20.json) ``` -curl -o tmdb.json http://es-learn-to-rank.labs.o19s.com/tmdb.json +curl -o tmdb.json https://o19s-public-datasets.s3.amazonaws.com/tmdb_2020-05-20.json ``` 2. Install the [pysolr](https://github.com/django-haystack/pysolr) library @@ -92,8 +92,3 @@ If you want to use Postman during the TLRE class: 4. Tinker with the base URL, Params or JSON Body (optional) 5. Press 'Send' (blue rectangle button right of URL bar) -This collection is also valuable for testing examples against new versions of Solr. Using Postman's command line tool [Newman](https://github.com/postmanlabs/newman) you can check all of the requests in the collection: - -``` -newman run --global-var "solr_host=localhost:8983" solr-TLRE-postman_collection.json -``` diff --git a/docker.sh b/docker.sh new file mode 100755 index 0000000..0a8c49d --- /dev/null +++ b/docker.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +docker run -p 8983:8983 -v $(PWD)/solr_home:/opt/mysolrhome -e SOLR_HOME=/opt/mysolrhome -e INIT_SOLR_HOME=yes solr:8.4.1 diff --git a/indexTmdb.py b/indexTmdb.py index c35e668..f1d754d 100644 --- a/indexTmdb.py +++ b/indexTmdb.py @@ -30,4 +30,4 @@ def indexableMovies(): if __name__ == "__main__": solr = pysolr.Solr('http://localhost:8983/solr/tmdb', timeout=100) - solr.add(indexableMovies(), commit=True) + solr.add(list(indexableMovies()), commit=True) diff --git a/requirements.txt b/requirements.txt index 3d5149e..83e62f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,6 @@ -certifi==2019.9.11 +certifi==2020.4.5.1 chardet==3.0.4 -idna==2.8 -pipenv==2018.11.26 -pysolr==3.8.1 -requests==2.22.0 -urllib3==1.25.6 -virtualenv==16.7.7 -virtualenv-clone==0.5.3 +idna==2.9 +pysolr==3.9.0 +requests==2.23.0 +urllib3==1.25.9 diff --git a/solr-TLRE-postman_collection.json b/solr_postman_collection.json similarity index 100% rename from solr-TLRE-postman_collection.json rename to solr_postman_collection.json