-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add multilingual support and external linking for Pokémon data
- Loading branch information
1 parent
dd0d4eb
commit dd38d49
Showing
15 changed files
with
20,713 additions
and
7 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package org.example.linking; | ||
|
||
import org.apache.jena.rdf.model.*; | ||
import org.apache.jena.vocabulary.OWL; | ||
import org.json.JSONArray; | ||
import org.json.JSONObject; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
import java.net.URI; | ||
import java.net.http.HttpClient; | ||
import java.net.http.HttpRequest; | ||
import java.net.http.HttpResponse; | ||
import java.time.Duration; | ||
|
||
public class ExternalLinker { | ||
private static final Logger logger = LoggerFactory.getLogger(ExternalLinker.class); | ||
private static final String DBPEDIA_SPARQL_ENDPOINT = "https://dbpedia.org/sparql"; | ||
private final HttpClient httpClient; | ||
|
||
public ExternalLinker() { | ||
this.httpClient = HttpClient.newBuilder() | ||
.connectTimeout(Duration.ofSeconds(30)) | ||
.build(); | ||
} | ||
|
||
public void addExternalLinks(Model model) { | ||
ResIterator pokemonIterator = model.listResourcesWithProperty( | ||
model.createProperty("http://schema.org/name")); | ||
|
||
while (pokemonIterator.hasNext()) { | ||
Resource pokemon = pokemonIterator.next(); | ||
String pokemonName = pokemon.getProperty( | ||
model.createProperty("http://schema.org/name")) | ||
.getString(); | ||
|
||
// Find DBpedia resource | ||
String dbpediaUri = findDBpediaResource(pokemonName); | ||
if (dbpediaUri != null) { | ||
pokemon.addProperty(OWL.sameAs, | ||
model.createResource(dbpediaUri)); | ||
logger.info("Added DBpedia link for {}: {}", | ||
pokemonName, dbpediaUri); | ||
} | ||
} | ||
} | ||
|
||
private String findDBpediaResource(String pokemonName) { | ||
// Simpler query that just looks for resources with matching label | ||
String query = String.format( | ||
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" + | ||
"SELECT DISTINCT ?resource WHERE {\n" + | ||
" ?resource rdfs:label \"%s\"@en .\n" + | ||
" FILTER EXISTS { ?resource ?p ?o }\n" + | ||
"} LIMIT 1", | ||
pokemonName); | ||
|
||
try { | ||
String encodedQuery = java.net.URLEncoder.encode(query, "UTF-8"); | ||
HttpRequest request = HttpRequest.newBuilder() | ||
.uri(URI.create("http://dbpedia.org/sparql" + "?query=" + encodedQuery + "&format=json")) | ||
.header("Accept", "application/sparql-results+json") | ||
.timeout(Duration.ofSeconds(10)) // Reduced timeout | ||
.GET() | ||
.build(); | ||
|
||
logger.debug("Querying DBpedia for {}", pokemonName); | ||
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); | ||
logger.debug("DBpedia response status: {}", response.statusCode()); | ||
|
||
if (response.statusCode() == 200) { | ||
JSONObject json = new JSONObject(response.body()); | ||
JSONArray bindings = json.getJSONObject("results") | ||
.getJSONArray("bindings"); | ||
|
||
if (bindings.length() > 0) { | ||
String uri = bindings.getJSONObject(0) | ||
.getJSONObject("resource") | ||
.getString("value"); | ||
logger.info("Found DBpedia resource for {}: {}", | ||
pokemonName, uri); | ||
return uri; | ||
} | ||
} else { | ||
logger.warn("DBpedia query failed with status code: {} for {}", | ||
response.statusCode(), pokemonName); | ||
logger.debug("Response: {}", response.body()); | ||
} | ||
} catch (Exception e) { | ||
logger.warn("Error finding DBpedia resource for {}: {}", | ||
pokemonName, e.getMessage()); | ||
} | ||
return null; | ||
} | ||
} |
142 changes: 142 additions & 0 deletions
142
src/main/java/org/example/parser/MultilingualDataHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
package org.example.parser; | ||
|
||
import org.apache.jena.rdf.model.*; | ||
import org.apache.jena.vocabulary.RDFS; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.FileReader; | ||
import java.io.IOException; | ||
import java.util.*; | ||
|
||
public class MultilingualDataHandler { | ||
private static final Logger logger = LoggerFactory.getLogger(MultilingualDataHandler.class); | ||
private static final String TSV_FILE = "pokedex-i18n.tsv"; | ||
private final Map<String, Map<String, String>> pokemonLabels = new HashMap<>(); | ||
private final Set<String> supportedLanguages = new HashSet<>(); | ||
|
||
public void loadTSVData() { | ||
try { | ||
// First try to read from the classpath | ||
String content = new String(getClass().getResourceAsStream("/pokedex-i18n.tsv").readAllBytes()); | ||
processContent(content); | ||
} catch (Exception e) { | ||
// If not found in classpath, try as a regular file | ||
try (BufferedReader reader = new BufferedReader(new FileReader(TSV_FILE))) { | ||
StringBuilder content = new StringBuilder(); | ||
String line; | ||
while ((line = reader.readLine()) != null) { | ||
content.append(line).append("\n"); | ||
} | ||
processContent(content.toString()); | ||
} catch (IOException ex) { | ||
logger.error("Error loading multilingual data:", ex); | ||
} | ||
} | ||
} | ||
|
||
private void processContent(String content) { | ||
String[] lines = content.split("\n"); | ||
if (lines.length > 0) { | ||
// Extract language codes from header | ||
String[] headers = lines[0].split("\t"); | ||
for (int i = 1; i < headers.length; i++) { | ||
supportedLanguages.add(headers[i].trim()); | ||
} | ||
|
||
// Process each line | ||
for (int i = 1; i < lines.length; i++) { | ||
processLine(lines[i]); | ||
} | ||
logger.info("Loaded multilingual data for {} Pokemon in {} languages", | ||
pokemonLabels.size(), supportedLanguages.size()); | ||
logger.debug("Supported languages: {}", supportedLanguages); | ||
} | ||
} | ||
|
||
private void processLine(String line) { | ||
String[] parts = line.split("\t"); | ||
if (parts.length > 1) { | ||
String pokemonId = parts[0].trim(); | ||
Map<String, String> labels = new HashMap<>(); | ||
|
||
for (int i = 1; i < parts.length && i <= supportedLanguages.size(); i++) { | ||
String lang = new ArrayList<>(supportedLanguages).get(i-1); | ||
String label = parts[i].trim(); | ||
if (!label.isEmpty()) { | ||
labels.put(lang, label); | ||
} | ||
} | ||
|
||
pokemonLabels.put(pokemonId, labels); | ||
} | ||
} | ||
|
||
public void enrichModelWithLabels(Model model) { | ||
logger.info("Starting model enrichment with multilingual labels"); | ||
boolean hasJapanese = false; | ||
boolean hasFrench = false; | ||
|
||
// First add base English labels from schema:name | ||
ResIterator nameIterator = model.listResourcesWithProperty( | ||
model.createProperty("http://schema.org/name")); | ||
|
||
while (nameIterator.hasNext()) { | ||
Resource pokemon = nameIterator.next(); | ||
String name = pokemon.getProperty( | ||
model.createProperty("http://schema.org/name")).getString(); | ||
String identifier = pokemon.getProperty( | ||
model.createProperty("http://schema.org/identifier")).getString(); | ||
|
||
// Add English label | ||
pokemon.addProperty(RDFS.label, model.createLiteral(name, "en")); | ||
|
||
// Add Japanese label (from wiki data) | ||
Statement jnameStmt = pokemon.getProperty( | ||
model.createProperty("http://example.org/pokemon/japaneseName")); | ||
if (jnameStmt != null) { | ||
pokemon.addProperty(RDFS.label, | ||
model.createLiteral(jnameStmt.getString(), "ja")); | ||
hasJapanese = true; | ||
} | ||
|
||
// Add French label (from pokedex-i18n.tsv) | ||
Map<String, String> translations = pokemonLabels.get(identifier); | ||
if (translations != null && translations.containsKey("fr")) { | ||
pokemon.addProperty(RDFS.label, | ||
model.createLiteral(translations.get("fr"), "fr")); | ||
hasFrench = true; | ||
} | ||
} | ||
|
||
logger.info("Added labels in languages - English: true, Japanese: {}, French: {}", | ||
hasJapanese, hasFrench); | ||
|
||
// Add translations from TSV file | ||
ResIterator pokemonIterator = model.listResourcesWithProperty( | ||
model.createProperty("http://schema.org/identifier")); | ||
|
||
while (pokemonIterator.hasNext()) { | ||
Resource pokemon = pokemonIterator.next(); | ||
String identifier = pokemon.getProperty( | ||
model.createProperty("http://schema.org/identifier")) | ||
.getString(); | ||
|
||
// Add labels for this Pokemon in all available languages | ||
Map<String, String> labels = pokemonLabels.get(identifier); | ||
if (labels != null) { | ||
for (Map.Entry<String, String> entry : labels.entrySet()) { | ||
pokemon.addProperty(RDFS.label, | ||
model.createLiteral(entry.getValue(), entry.getKey())); | ||
} | ||
} | ||
} | ||
|
||
logger.info("Enriched model with multilingual labels"); | ||
} | ||
|
||
public Set<String> getSupportedLanguages() { | ||
return Collections.unmodifiableSet(supportedLanguages); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
id en ja fr | ||
1 Bulbasaur フシギダネ Bulbizarre | ||
2 Ivysaur フシギソウ Herbizarre | ||
3 Venusaur フシギバナ Florizarre |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
id en ja fr | ||
1 Bulbasaur フシギダネ Bulbizarre | ||
2 Ivysaur フシギソウ Herbizarre | ||
3 Venusaur フシギバナ Florizarre |
2 changes: 2 additions & 0 deletions
2
target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
8 changes: 5 additions & 3 deletions
8
target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,14 @@ | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/client/BulbapediaClient.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/server/LinkedDataServer.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/validation/RDFValidator.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/rdf/PokemonRDFConverter.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/server/EndpointTester.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/rdf/PokemonRDFGenerator.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/App.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/client/EvolutionChainFetcher.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/linking/ExternalLinker.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/rdf/PokemonRDFConverter.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/parser/MultilingualDataHandler.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/validation/PokemonShapes.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/server/SPARQLHandler.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/server/PokemonFusekiServer.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/App.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/client/EvolutionChainFetcher.java | ||
/Users/anjolaoluwaadeuyi/Documents/CPS2/Master 2 (M2)/Semantic web/SW Project/bulbapedia-kg/src/main/java/org/example/parser/WikiInfoboxParser.java |
Oops, something went wrong.