pengovbr · marlinhares · Jan 21, 2025 · Jan 14, 2025
diff --git a/containers/solr-9.6.1/Dockerfile b/containers/solr-9.6.1/Dockerfile
@@ -0,0 +1,23 @@
+################################################################################
+# Dockerfile de construcao do container Solr utilizado pelo SEI
+#
+# Container preparado e configurado para uso em desenvolvimento e testes
+################################################################################
+
+ARG IMAGEM_BASE=processoeletronico/base-rocky93:latest
+
+FROM ${IMAGEM_BASE}
+
+ENV TERM xterm
+
+ADD assets/ \
+   https://archive.apache.org/dist/solr/solr/9.6.1/solr-9.6.1.tgz \
+   https://github.com/spbgovbr/sei-docker-binarios/raw/main/pacoteslinux/msttcore-fonts-2.0-3.noarch.rpm \ 
+   /tmp/
+
+RUN ls -lh /tmp && sh /tmp/install.sh && rm -rf /tmp/*
+
+USER solr
+
+EXPOSE 8983
+CMD ["/command.sh"]
diff --git a/containers/solr-9.6.1/assets/command.sh b/containers/solr-9.6.1/assets/command.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env sh
+
+/opt/solr/bin/solr -p 8983 -f
diff --git a/containers/solr-9.6.1/assets/install.sh b/containers/solr-9.6.1/assets/install.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+set -e
+
+yum -y update && yum -y install lsof wget glibc-locale-source procps bc
+
+localedef pt_BR -i pt_BR -f ISO-8859-1
+
+dnf -y install java-17-openjdk
+
+# Instalação de pacote de fontes do windows
+rpm -Uvh /tmp/msttcore-fonts-2.0-3.noarch.rpm
+
+# join splited solr
+# cat /tmp/solr-9.6.1.tgz.part* > /tmp/solr-9.6.1.tgz
+
+mv /tmp/solr9.6.1sei/* /tmp/
+
+useradd solr
+
+chmod +x /tmp/sei-solr-9.6.1.sh
+
+cd /tmp
+
+/tmp/sei-solr-9.6.1.sh
+
+echo "" >> /opt/solr/bin/solr.in.sh
+echo 'SOLR_OPTS="$SOLR_OPTS -Dsolr.allowPaths=/dados"' >> /opt/solr/bin/solr.in.sh
+echo 'SOLR_JETTY_HOST="0.0.0.0"' >> /opt/solr/bin/solr.in.sh
+
+mv /tmp/command.sh /
+
+yum clean all
+rm -rf /var/cache/yum
+
+# Configuração de permissões de execução no script de inicialização do container
+chmod +x /command.sh
diff --git a/containers/solr-9.6.1/assets/solr9.6.1sei/log4j.properties b/containers/solr-9.6.1/assets/solr9.6.1sei/log4j.properties
@@ -0,0 +1,24 @@
+#  Logging level
+solr.log=logs
+log4j.rootLogger=WARN, file, CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+
+log4j.appender.CONSOLE.layout=org.apache.log4j.EnhancedPatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
+
+#- size rotation with log cleanup.
+log4j.appender.file=org.apache.log4j.RollingFileAppender
+log4j.appender.file.MaxFileSize=4MB
+log4j.appender.file.MaxBackupIndex=9
+
+#- File to log to and log format
+log4j.appender.file.File=${solr.log}/solr.log
+log4j.appender.file.layout=org.apache.log4j.EnhancedPatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss.SSS} %-5p (%t) [%X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m\n
+
+log4j.logger.org.apache.zookeeper=WARN
+log4j.logger.org.apache.hadoop=WARN
+
+# set to INFO to enable infostream log messages
+log4j.logger.org.apache.solr.update.LoggingInfoStream=OFF
diff --git a/containers/solr-9.6.1/assets/solr9.6.1sei/security.json b/containers/solr-9.6.1/assets/solr9.6.1sei/security.json
@@ -0,0 +1,12 @@
+{
+  "authentication":{
+    "blockUnknown": true,
+    "class":"solr.BasicAuthPlugin",
+    "credentials":{"admin":"etj8dxXw64oqYMn1V3FRkIT81TlGQEcdvhm/Y9OOt90= ofmrnvtF35ksk37rQjLd/i6PxAlqFD/24ySenLSQeLA=","sei":"mmiwzGKuZpDvyzG/B9vnBFnHVbblaDr7kw84alKR15U= x6C+p2Oscp7MDXHuImMw7rfy5nvJ32H+Dc+6EL7145Y="}
+  },
+  "authorization":{
+    "class":"solr.RuleBasedAuthorizationPlugin",
+    "permissions":[{"name":"read", "role":"*"},{"name":"update", "role":"*"},{"name":"all", "role":"admin"}],
+    "user-role":{"admin":"admin","sei": "basic"}
+  }
+}
diff --git a/...ers/solr-9.6.1/assets/solr9.6.1sei/sei-cores-9.6.1/sei-bases-conhecimento/conf/schema.xml b/...ers/solr-9.6.1/assets/solr9.6.1sei/sei-cores-9.6.1/sei-bases-conhecimento/conf/schema.xml
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="sei-bases-conhecimento" version="1.6">
+
+  <field name="id_bc" type="int" indexed="true" stored="true" />
+  <field name="desc" type="string" indexed="true" stored="true" />
+  <field name="id_uni" type="int" indexed="true" stored="true" />
+  <field name="dta_ger" type="date" indexed="true" stored="true" />
+  <field name="id_anexo" type="int" indexed="true" stored="true" />
+  <field name="nome_anexo" type="string" indexed="true" stored="true" />
+  <field name="dth_idx" type="date" indexed="true" stored="true" />
+  <field name="aux1" type="string" indexed="true" stored="true" />
+  <field name="aux2" type="string" indexed="true" stored="true" />
+  <field name="aux3" type="int" indexed="true" stored="true" />
+
+  <!-- Common metadata fields, named specifically to match up with
+       SolrCell metadata when parsing rich documents such as Word, PDF.
+       Some fields are multiValued only because Tika currently may return
+       multiple values for them. Some metadata is parsed from the documents,
+       but there are some which come from the client context:
+         "content_type": From the HTTP headers of incoming stream
+         "resourcename": From SolrCell request param resource.name
+     -->
+  <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
+  <field name="subject" type="text_general" indexed="true" stored="true"/>
+  <field name="description" type="text_general" indexed="true" stored="true"/>
+  <field name="comments" type="text_general" indexed="true" stored="true"/>
+  <field name="author" type="text_general" indexed="true" stored="true"/>
+  <field name="keywords" type="text_general" indexed="true" stored="true"/>
+  <field name="category" type="text_general" indexed="true" stored="true"/>
+  <field name="resourcename" type="text_general" indexed="true" stored="true"/>
+  <field name="url" type="text_general" indexed="true" stored="true"/>
+  <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
+  <field name="last_modified" type="date" indexed="true" stored="true"/>
+  <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
+
+  <!-- Main body of document extracted by SolrCell.
+         NOTE: This field is not indexed by default, since it is also copied to "text"
+         using copyField below. This is to save space. Use this field for returning and
+         highlighting document content. Use the "text" field to search the content. -->
+
+  <field name="content" type="text_general" indexed="false" stored="true" multiValued="true" />
+
+  <!-- catchall field, containing all other searchable text fields (implemented
+       via copyField further on in this schema  -->
+
+  <field name="text" type="text_general" indexed="true" stored="false" multiValued="true" termVectors="true" />
+
+  <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
+       or Solr won't start. _version_ and update log are required for SolrCloud
+    -->
+
+  <field name="_version_" type="long" indexed="true" stored="true"/>
+
+  <!-- points to the root document of a block of nested documents. Required for nested
+       document support, may be removed otherwise
+    -->
+  <field name="_root_" type="string" indexed="true" stored="false"/>
+
+  <!-- Only remove the "id" field if you have a very good reason to. While not strictly
+     required, it is highly recommended. A <uniqueKey> is present in almost all Solr
+     installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
+     Do NOT change the type and apply index-time analysis to the <uniqueKey> as it will likely
+     make routing in SolrCloud and document replacement in general fail. Limited _query_ time
+     analysis is possible as long as the indexing process is guaranteed to index the term
+     in a compatible way. Any analysis applied to the <uniqueKey> should _not_ produce multiple
+     tokens
+   -->
+  <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
+
+  <!-- Field to use to determine and enforce document uniqueness.
+         Unless this field is marked with required="false", it will be a required field
+      -->
+  <uniqueKey>id</uniqueKey>
+
+  <!-- copyField commands copy one field to another at the time a document
+          is added to the index.  It's used either to index the same field differently,
+          or to add multiple fields to the same field for easier/faster searching.  -->
+
+  <!-- Text fields from SolrCell to search by default in our catch-all field -->
+
+  <copyField source="title" dest="text"/>
+  <copyField source="author" dest="text"/>
+  <copyField source="description" dest="text"/>
+  <copyField source="keywords" dest="text"/>
+  <copyField source="content" dest="text"/>
+  <copyField source="content_type" dest="text"/>
+  <copyField source="resourcename" dest="text"/>
+  <copyField source="url" dest="text"/>
+  <copyField source="desc" dest="text"/>
+
+
+  <!-- field type definitions. The "name" attribute is
+     just a label to be used by field definitions.  The "class"
+     attribute and any other attributes determine the real
+     behavior of the fieldType.
+       Class names starting with "solr" refer to java classes in a
+     standard package such as org.apache.solr.analysis
+  -->
+  <fieldType name="string" class="solr.StrField" sortMissingLast="true" />
+  <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+  <fieldType name="int" class="solr.IntPointField" docValues="true"/>
+  <fieldType name="float" class="solr.FloatPointField" docValues="true"/>
+  <fieldType name="long" class="solr.LongPointField" docValues="true"/>
+  <fieldType name="double" class="solr.DoublePointField" docValues="true"/>
+  <fieldType name="date" class="solr.DatePointField" docValues="true"/>
+
+  <fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" multiValued="true"/>
+  <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+  <fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
+  <fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
+  <fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
+  <fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
+  <fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>
+
+  <!-- A general text field that has reasonable, generic
+         cross-language defaults: it tokenizes with StandardTokenizer,
+   removes stop words from case-insensitive "stopwords.txt"
+   (empty by default), and down cases.  At query time only, it
+   also applies synonyms. -->
+
+  <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+    <analyzer type="index">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" />
+      <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" splitOnCaseChange="1" splitOnNumerics="0"  catenateWords="1" catenateNumbers="1" catenateAll="1" />
+      <filter class="solr.FlattenGraphFilterFactory" />
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.ASCIIFoldingFilterFactory"/>
+    </analyzer>
+
+    <analyzer type="query">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.SynonymGraphFilterFactory" expand="true" ignoreCase="true" synonyms="synonyms.txt"/>
+      <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" />
+      <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" splitOnCaseChange="1" splitOnNumerics="0"  catenateWords="1" catenateNumbers="1" catenateAll="1" />
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.ASCIIFoldingFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+</schema>
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#!/usr/bin/env sh

		/opt/solr/bin/solr -p 8983 -f