Skip to content

Commit

Permalink
Merge pull request #28 from wrigleyDan/master
Browse files Browse the repository at this point in the history
Integrate Querqy & add missing index time synonym field and fieldType
  • Loading branch information
wrigleyDan authored Apr 19, 2022
2 parents de8f248 + 702be89 commit 422672b
Show file tree
Hide file tree
Showing 8 changed files with 163 additions and 4 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ Windows:

2. Navigate into the newly unzipped directory.

3. Run Solr pointing at the TMDB Solr Home directory included in this repo.
3. Open `/path/to/solr-tmdb-master/solr_home/tmdb/conf/solrconfig.xml` and change the path to include the extra libraries located in `/path/to/solr-tmdb-master/docker/lib`.

4. Run Solr pointing at the TMDB Solr Home directory included in this repo.

Linux/OSX:
>bin/solr start -f -s /path/to/solr-tmdb-master/solr_home/
Expand All @@ -53,7 +55,7 @@ Windows:
Regardless of the option you choose, navigate to [http://localhost:8983/solr/](http://localhost:8983/solr/) to confirm Solr is running.

# Index TMDB movies
We have a movie data corpus sourced from The Movie Database (similar data to IMDB (Internet Movie Database).
We have a movie data corpus sourced from The Movie Database, similar data to IMDB (Internet Movie Database).

Linux/OSX:
> ./index.sh
Expand Down
14 changes: 14 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version: '3'
services:
solr:
container_name: solr
build: ./docker
ports:
- "8983:8983"
environment:
- SOLR_OPTS=-XX:-UseLargePages
- SOLR_HEAP=800m
- SOLR_HOME=/opt/mysolrhome
- INIT_SOLR_HOME=yes
volumes:
- ./solr_home:/opt/mysolrhome
2 changes: 1 addition & 1 deletion docker.ps1
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

docker run -p 8983:8983 -v ${PWD}/solr_home:/opt/mysolrhome -e SOLR_HOME=/opt/mysolrhome -e INIT_SOLR_HOME=yes solr:8.11.1
docker-compose up -d
2 changes: 1 addition & 1 deletion docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ if [ "$(uname -s)" == 'Linux' ]; then
sudo chown -R 8983:8983 solr_home
fi

docker run -p 8983:8983 -v "${PWD}/solr_home:/opt/mysolrhome" -e SOLR_HOME=/opt/mysolrhome -e INIT_SOLR_HOME=yes solr:8.11.1
docker-compose up -d
14 changes: 14 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM solr:8.11.1

# produced by building locally your Solr
#FROM apache/solr:9.0.0-SNAPSHOT

# produced by Apache Solr project
#FROM apache/solr-nightly:9.0.0-SNAPSHOT

# This is required by Solr 9, but not in Solr 8!
#RUN mkdir /var/solr/data


COPY ./lib/querqy-solr-5.1.lucene810.0-jar-with-dependencies.jar /opt/querqy/lib/
COPY ./lib/querqy-regex-filter-1.1.0-SNAPSHOT.jar /opt/querqy/lib/
36 changes: 36 additions & 0 deletions solr-postman-collection.json
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,42 @@
},
"response": []
},
{
"name": "4-Add Querqy boost rule",
"request": {
"method": "POST",
"header": [],
"body": {
"mode": "raw",
"raw": "{\n \"class\": \"querqy.solr.rewriter.commonrules.CommonRulesRewriterFactory\",\n \"config\": {\n \"rules\" : \"action =>\\nUP(100): * genres:action\"\n }\n}\n",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "{{solr_host}}/solr/tmdb/querqy/rewriter/common_rules?action=save",
"host": [
"{{solr_host}}"
],
"path": [
"solr",
"tmdb",
"querqy",
"rewriter",
"common_rules"
],
"query": [
{
"key": "action",
"value": "save"
}
]
}
},
"response": []
},
{
"name": "6-LTR",
"request": {
Expand Down
22 changes: 22 additions & 0 deletions solr_home/tmdb/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@
<copyField source="overview" dest="overview_multiterm_syn"/>
<copyField source="tagline" dest="tagline_multiterm_syn"/>

<copyField source="overview" dest="overview_multiterm_index_syn"/>

<copyField source="overview" dest="overview_idioms"/>
<copyField source="title" dest="title_idioms"/>
<copyField source="tagline" dest="tagline_idioms"/>
Expand Down Expand Up @@ -484,6 +486,26 @@
</analyzer>
</fieldType>

<dynamicField name="*_multiterm_index_syn" type="text_general_multiterm_index_syn" indexed="true" stored="true"/>
<fieldType name="text_general_multiterm_index_syn" class="solr.TextField" autoGeneratePhraseQueries="true"
enableGraphQueries="true" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.SynonymGraphFilterFactory" ignoreCase="true" synonyms="synonyms_multiterm.txt"/>
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!-- in this example, we will only use synonyms at index time
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_multiterm.txt"/>
-->
</analyzer>
</fieldType>

<dynamicField name="*_ws_syn" type="text_general_ws_syn" indexed="true" stored="true"/>
<fieldType name="text_general_ws_syn" class="solr.TextField" autoGeneratePhraseQueries="false"
enableGraphQueries="false" positionIncrementGap="100" multiValued="true">
Expand Down
71 changes: 71 additions & 0 deletions solr_home/tmdb/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
<lib dir="./lib" />
-->
<lib dir="/opt/querqy/lib/" />

<!-- A 'dir' option by itself adds any files found in the directory
to the classpath, this is useful for including all jars in a
Expand Down Expand Up @@ -1427,6 +1428,76 @@
<queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
-->

<queryParser name="querqy" class="querqy.solr.QuerqyDismaxQParserPlugin">
<bool name="skipUnknownRewriters">true</bool>
<lst name="infoLogging">
<!--
Define a 'sink' named 'responseSink' to which the logging information
will be sent:
-->

<lst name="sink">
<str name="id">responseSink</str>
<str name="class">querqy.solr.ResponseSink</str>
</lst>

<!--
Send the logging information from rewriter 'replace_prelive' to sink
'responseSink':
-->
<lst name="mapping">
<str name="rewriter">replace_prelive</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'replace' to sink
'responseSink':
-->
<lst name="mapping">
<str name="rewriter">replace</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'common_rules_prelive' to sink
'responseSink' too:
-->
<lst name="mapping">
<str name="rewriter">common_rules_prelive</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'common_rules' to sink
'responseSink' too:
-->
<lst name="mapping">
<str name="rewriter">common_rules</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'regex' to sink
'responseSink' as well:
-->
<lst name="mapping">
<str name="rewriter">regex_screen_protectors</str>
<str name="sink">responseSink</str>
</lst>

</lst>
</queryParser>


<!-- Override the default QueryComponent -->
<searchComponent name="query" class="querqy.solr.QuerqyQueryComponent"/>

<!--
Add the Querqy request handler.
-->
<requestHandler name="/querqy/rewriter" class="querqy.solr.QuerqyRewriterRequestHandler"/>

<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<str name="content-type">text/javascript; charset=UTF-8</str>
</queryResponseWriter>
Expand Down

0 comments on commit 422672b

Please sign in to comment.