Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate Querqy & add missing index time synonym field and fieldType #28

Merged
merged 4 commits into from
Apr 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ Windows:

2. Navigate into the newly unzipped directory.

3. Run Solr pointing at the TMDB Solr Home directory included in this repo.
3. Open `/path/to/solr-tmdb-master/solr_home/tmdb/conf/solrconfig.xml` and change the path to include the extra libraries located in `/path/to/solr-tmdb-master/docker/lib`.

4. Run Solr pointing at the TMDB Solr Home directory included in this repo.

Linux/OSX:
>bin/solr start -f -s /path/to/solr-tmdb-master/solr_home/
Expand All @@ -53,7 +55,7 @@ Windows:
Regardless of the option you choose, navigate to [http://localhost:8983/solr/](http://localhost:8983/solr/) to confirm Solr is running.

# Index TMDB movies
We have a movie data corpus sourced from The Movie Database (similar data to IMDB (Internet Movie Database).
We have a movie data corpus sourced from The Movie Database, similar data to IMDB (Internet Movie Database).

Linux/OSX:
> ./index.sh
Expand Down
14 changes: 14 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
version: '3'
services:
solr:
container_name: solr
build: ./docker
ports:
- "8983:8983"
environment:
- SOLR_OPTS=-XX:-UseLargePages
- SOLR_HEAP=800m
- SOLR_HOME=/opt/mysolrhome
- INIT_SOLR_HOME=yes
volumes:
- ./solr_home:/opt/mysolrhome
2 changes: 1 addition & 1 deletion docker.ps1
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

docker run -p 8983:8983 -v ${PWD}/solr_home:/opt/mysolrhome -e SOLR_HOME=/opt/mysolrhome -e INIT_SOLR_HOME=yes solr:8.11.1
docker-compose up -d
2 changes: 1 addition & 1 deletion docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ if [ "$(uname -s)" == 'Linux' ]; then
sudo chown -R 8983:8983 solr_home
fi

docker run -p 8983:8983 -v "${PWD}/solr_home:/opt/mysolrhome" -e SOLR_HOME=/opt/mysolrhome -e INIT_SOLR_HOME=yes solr:8.11.1
docker-compose up -d
14 changes: 14 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM solr:8.11.1

# produced by building locally your Solr
#FROM apache/solr:9.0.0-SNAPSHOT

# produced by Apache Solr project
#FROM apache/solr-nightly:9.0.0-SNAPSHOT

# This is required by Solr 9, but not in Solr 8!
#RUN mkdir /var/solr/data


COPY ./lib/querqy-solr-5.1.lucene810.0-jar-with-dependencies.jar /opt/querqy/lib/
COPY ./lib/querqy-regex-filter-1.1.0-SNAPSHOT.jar /opt/querqy/lib/
36 changes: 36 additions & 0 deletions solr-postman-collection.json
Original file line number Diff line number Diff line change
Expand Up @@ -1531,6 +1531,42 @@
},
"response": []
},
{
"name": "4-Add Querqy boost rule",
"request": {
"method": "POST",
"header": [],
"body": {
"mode": "raw",
"raw": "{\n \"class\": \"querqy.solr.rewriter.commonrules.CommonRulesRewriterFactory\",\n \"config\": {\n \"rules\" : \"action =>\\nUP(100): * genres:action\"\n }\n}\n",
"options": {
"raw": {
"language": "json"
}
}
},
"url": {
"raw": "{{solr_host}}/solr/tmdb/querqy/rewriter/common_rules?action=save",
"host": [
"{{solr_host}}"
],
"path": [
"solr",
"tmdb",
"querqy",
"rewriter",
"common_rules"
],
"query": [
{
"key": "action",
"value": "save"
}
]
}
},
"response": []
},
{
"name": "6-LTR",
"request": {
Expand Down
22 changes: 22 additions & 0 deletions solr_home/tmdb/conf/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@
<copyField source="overview" dest="overview_multiterm_syn"/>
<copyField source="tagline" dest="tagline_multiterm_syn"/>

<copyField source="overview" dest="overview_multiterm_index_syn"/>

<copyField source="overview" dest="overview_idioms"/>
<copyField source="title" dest="title_idioms"/>
<copyField source="tagline" dest="tagline_idioms"/>
Expand Down Expand Up @@ -484,6 +486,26 @@
</analyzer>
</fieldType>

<dynamicField name="*_multiterm_index_syn" type="text_general_multiterm_index_syn" indexed="true" stored="true"/>
<fieldType name="text_general_multiterm_index_syn" class="solr.TextField" autoGeneratePhraseQueries="true"
enableGraphQueries="true" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.SynonymGraphFilterFactory" ignoreCase="true" synonyms="synonyms_multiterm.txt"/>
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!-- in this example, we will only use synonyms at index time
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms_multiterm.txt"/>
-->
</analyzer>
</fieldType>

<dynamicField name="*_ws_syn" type="text_general_ws_syn" indexed="true" stored="true"/>
<fieldType name="text_general_ws_syn" class="solr.TextField" autoGeneratePhraseQueries="false"
enableGraphQueries="false" positionIncrementGap="100" multiValued="true">
Expand Down
71 changes: 71 additions & 0 deletions solr_home/tmdb/conf/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@

<lib dir="./lib" />
-->
<lib dir="/opt/querqy/lib/" />

<!-- A 'dir' option by itself adds any files found in the directory
to the classpath, this is useful for including all jars in a
Expand Down Expand Up @@ -1427,6 +1428,76 @@
<queryResponseWriter name="schema.xml" class="solr.SchemaXmlResponseWriter"/>
-->

<queryParser name="querqy" class="querqy.solr.QuerqyDismaxQParserPlugin">
<bool name="skipUnknownRewriters">true</bool>
<lst name="infoLogging">
<!--
Define a 'sink' named 'responseSink' to which the logging information
will be sent:
-->

<lst name="sink">
<str name="id">responseSink</str>
<str name="class">querqy.solr.ResponseSink</str>
</lst>

<!--
Send the logging information from rewriter 'replace_prelive' to sink
'responseSink':
-->
<lst name="mapping">
<str name="rewriter">replace_prelive</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'replace' to sink
'responseSink':
-->
<lst name="mapping">
risdenk marked this conversation as resolved.
Show resolved Hide resolved
<str name="rewriter">replace</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'common_rules_prelive' to sink
'responseSink' too:
-->
<lst name="mapping">
<str name="rewriter">common_rules_prelive</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'common_rules' to sink
'responseSink' too:
-->
<lst name="mapping">
<str name="rewriter">common_rules</str>
<str name="sink">responseSink</str>
</lst>

<!--
Send the logging information from rewriter 'regex' to sink
'responseSink' as well:
-->
<lst name="mapping">
<str name="rewriter">regex_screen_protectors</str>
<str name="sink">responseSink</str>
</lst>

</lst>
</queryParser>


<!-- Override the default QueryComponent -->
<searchComponent name="query" class="querqy.solr.QuerqyQueryComponent"/>

<!--
Add the Querqy request handler.
-->
<requestHandler name="/querqy/rewriter" class="querqy.solr.QuerqyRewriterRequestHandler"/>

<queryResponseWriter name="json" class="solr.JSONResponseWriter">
<str name="content-type">text/javascript; charset=UTF-8</str>
</queryResponseWriter>
Expand Down