Skip to content

Commit

Permalink
[search] Search in downloader by country names.
Browse files Browse the repository at this point in the history
When a request to search in downloader arrives, we used to
only find features on the world map that match the request
and return the mwms that contain these features.

This commit mixes in the results of search directly in
the country tree (countries.txt), or, to be more precise, by
the translations of the names of the countries there (countries_names.txt).

This is not the most efficient implementation but hopefully
it isolated enough to make improvements easy and it was also
useful as an exploration where our current search APIs are lacking, for example

* The unnecessary std::string<->UniString conversions.
* Indexes such as MemSearchIndex pretending to be generic while in fact being
  tailored to a particular use-case.
* The difficulty of mixing search results from different sources.
  • Loading branch information
mpimenov committed Jan 24, 2021
1 parent 0df8e92 commit c35c388
Show file tree
Hide file tree
Showing 19 changed files with 335 additions and 36 deletions.
1 change: 1 addition & 0 deletions android/assets/countries_names.txt
1 change: 1 addition & 0 deletions android/script/replace_links.bat
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ cp ../data/classificator.txt assets/
cp ../data/colors.txt assets/
cp ../data/copyright.html assets/
cp ../data/countries.txt assets/
cp ../data/countries_names.txt assets/
cp ../data/drules_proto_dark.bin assets/
cp ../data/drules_proto_clear.bin assets/
cp ../data/drules_proto_vehicle_dark.bin assets/
Expand Down
1 change: 1 addition & 0 deletions defines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@

#define COUNTRIES_FILE "countries.txt"
#define COUNTRIES_META_FILE "countries_meta.txt"
#define COUNTRIES_NAMES_FILE "countries_names.txt"
#define LEAP_SPEEDS_FILE "leap_speeds.json"

#define WORLD_FILE_NAME "World"
Expand Down
4 changes: 4 additions & 0 deletions iphone/Maps/Maps.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@
34F73FA31E08300E00AC1FD6 /* Images.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 34F73FA11E08300E00AC1FD6 /* Images.xcassets */; };
34F742321E0834F400AC1FD6 /* UIViewController+Navigation.m in Sources */ = {isa = PBXBuildFile; fileRef = 34F742301E0834F400AC1FD6 /* UIViewController+Navigation.m */; };
34FE5A6F1F18F30F00BCA729 /* TrafficButtonArea.swift in Sources */ = {isa = PBXBuildFile; fileRef = 34FE5A6D1F18F30F00BCA729 /* TrafficButtonArea.swift */; };
3970A6A825B64EE400CF5828 /* countries_names.txt in Resources */ = {isa = PBXBuildFile; fileRef = 3970A6A725B64EE300CF5828 /* countries_names.txt */; };
39CDE69123E1B6C8007CDA58 /* libge0.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 39CDE69023E1B6C8007CDA58 /* libge0.a */; };
3D0D2F7623D858BF00945C8D /* IsolinesTutorialBlur.xib in Resources */ = {isa = PBXBuildFile; fileRef = 3D0D2F7523D858BF00945C8D /* IsolinesTutorialBlur.xib */; };
3D15ACEE2155117000F725D5 /* MWMObjectsCategorySelectorDataSource.mm in Sources */ = {isa = PBXBuildFile; fileRef = 3D15ACED2155117000F725D5 /* MWMObjectsCategorySelectorDataSource.mm */; };
Expand Down Expand Up @@ -1416,6 +1417,7 @@
34FE4C431BCC013500066718 /* MWMMapWidgets.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MWMMapWidgets.h; sourceTree = "<group>"; };
34FE4C441BCC013500066718 /* MWMMapWidgets.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MWMMapWidgets.mm; sourceTree = "<group>"; };
34FE5A6D1F18F30F00BCA729 /* TrafficButtonArea.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TrafficButtonArea.swift; sourceTree = "<group>"; };
3970A6A725B64EE300CF5828 /* countries_names.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = countries_names.txt; path = ../../data/countries_names.txt; sourceTree = "<group>"; };
39CDE69023E1B6C8007CDA58 /* libge0.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; path = libge0.a; sourceTree = BUILT_PRODUCTS_DIR; };
3D0D2F7523D858BF00945C8D /* IsolinesTutorialBlur.xib */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = file.xib; path = IsolinesTutorialBlur.xib; sourceTree = "<group>"; };
3D15ACED2155117000F725D5 /* MWMObjectsCategorySelectorDataSource.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = MWMObjectsCategorySelectorDataSource.mm; sourceTree = "<group>"; };
Expand Down Expand Up @@ -2360,6 +2362,7 @@
29B97314FDCFA39411CA2CEA /* Maps */ = {
isa = PBXGroup;
children = (
3970A6A725B64EE300CF5828 /* countries_names.txt */,
47AEF83F2231249E00D20538 /* categories_brands.txt */,
471BBD92213038E000EB17C9 /* TipsAndTricks */,
FA36B8011540388B004560CC /* Bookmarks */,
Expand Down Expand Up @@ -5187,6 +5190,7 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
3970A6A825B64EE400CF5828 /* countries_names.txt in Resources */,
47AEF8402231249E00D20538 /* categories_brands.txt in Resources */,
F6C3A1B221AC22810060EEC8 /* Alert 5.m4a in Resources */,
4560F585213D53C100CC736C /* shaders_metal.metallib in Resources */,
Expand Down
37 changes: 34 additions & 3 deletions map/search_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,29 @@
#include "map/discovery/discovery_search_params.hpp"
#include "map/everywhere_search_params.hpp"

#include "partners_api/booking_api.hpp"

#include "search/bookmarks/processor.hpp"
#include "search/geometry_utils.hpp"
#include "search/hotels_filter.hpp"
#include "search/tracer.hpp"
#include "search/utils.hpp"

#include "partners_api/booking_api.hpp"

#include "storage/downloader_search_params.hpp"
#include "storage/storage_defines.hpp"

#include "platform/platform.hpp"
#include "platform/preferred_languages.hpp"
#include "platform/safe_callback.hpp"

#include "geometry/mercator.hpp"

#include "base/checked_cast.hpp"
#include "base/file_name_utils.hpp"
#include "base/string_utils.hpp"

#include "defines.hpp"

#include <algorithm>
#include <cmath>
#include <iterator>
Expand Down Expand Up @@ -259,8 +264,12 @@ bool SearchAPI::SearchInDownloader(storage::DownloaderSearchParams const & param
p.m_needAddress = false;
p.m_needHighlighting = false;

storage::DownloaderSearchResults resultsFromCountries;
SearchInDownloaderByCountryName(params, resultsFromCountries);
resultsFromCountries.m_query = p.m_query;
p.m_onResults = DownloaderSearchCallback(static_cast<DownloaderSearchCallback::Delegate &>(*this),
m_dataSource, m_infoGetter, m_storage, params);
m_dataSource, m_infoGetter, m_storage, params,
move(resultsFromCountries));

return Search(p, true /* forceSearch */);
}
Expand Down Expand Up @@ -510,6 +519,28 @@ void SearchAPI::Search(SearchIntent & intent)
intent.m_isDelayed = false;
}

void SearchAPI::SearchInDownloaderByCountryName(storage::DownloaderSearchParams const & params,
storage::DownloaderSearchResults & results)
{
// This index is heavy (several megabytes) but we expect that a small number of
// user sessions involves a search in downloader.
// Therefore, it is initialized lazily upon first request.
if (m_countriesNamesIndex == nullptr)
m_countriesNamesIndex = make_unique<CountriesNamesIndex>();

vector<storage::CountryId> countries;
m_countriesNamesIndex->CollectMatchingCountries(params.m_query, countries);
size_t const kMaxResultsFromCountriesTree = 5;
if (countries.size() > kMaxResultsFromCountriesTree)
countries.resize(kMaxResultsFromCountriesTree);
results.m_query = params.m_query;
for (auto const & country : countries)
results.m_results.emplace_back(country, "" /* matchedName */);

if (params.m_onResults)
params.m_onResults(results);
}

void SearchAPI::SetViewportIfPossible(SearchParams & params)
{
if (m_isViewportInitialized)
Expand Down
7 changes: 7 additions & 0 deletions map/search_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "map/viewport_search_callback.hpp"
#include "map/viewport_search_params.hpp"

#include "search/countries_names_index.hpp"
#include "search/downloader_search_callback.hpp"
#include "search/engine.hpp"
#include "search/mode.hpp"
Expand Down Expand Up @@ -189,6 +190,11 @@ class SearchAPI : public search::DownloaderSearchCallback::Delegate,
bool Search(search::SearchParams const & params, bool forceSearch);
void Search(SearchIntent & intent);

// Searches by the names of countries in countries.txt and their translations.
// Does not involving the *.mwm data at all.
void SearchInDownloaderByCountryName(storage::DownloaderSearchParams const & params,
storage::DownloaderSearchResults & results);

void SetViewportIfPossible(search::SearchParams & params);

bool QueryMayBeSkipped(search::SearchParams const & prevParams,
Expand All @@ -200,6 +206,7 @@ class SearchAPI : public search::DownloaderSearchCallback::Delegate,
Delegate & m_delegate;

search::Engine m_engine;
std::unique_ptr<search::CountriesNamesIndex> m_countriesNamesIndex;

search::QuerySaver m_searchQuerySaver;

Expand Down
2 changes: 2 additions & 0 deletions search/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ set(
city_finder.cpp
city_finder.hpp
common.hpp
countries_names_index.cpp
countries_names_index.hpp
cuisine_filter.cpp
cuisine_filter.hpp
displayed_categories.cpp
Expand Down
89 changes: 89 additions & 0 deletions search/countries_names_index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#include "search/countries_names_index.hpp"

#include "platform/platform.hpp"

#include "coding/file_reader.hpp"

#include "base/assert.hpp"

#include <fstream>
#include <set>
#include <sstream>

using namespace std;

namespace search
{
CountriesNamesIndex::CountriesNamesIndex()
{
ReadCountryNamesFromFile(m_countries);
BuildIndexFromTranslations();
}

void CountriesNamesIndex::CollectMatchingCountries(string const & query,
vector<storage::CountryId> & results)
{
set<size_t> ids;
auto insertId = [&ids](size_t id, bool /* exactMatch */) { ids.insert(id); };

vector<strings::UniString> tokens;
search::NormalizeAndTokenizeString(query, tokens);
search::Delimiters delims;
bool const lastTokenIsPrefix = !query.empty() && !delims(strings::LastUniChar(query));
for (size_t i = 0; i < tokens.size(); ++i)
{
auto const & token = tokens[i];
if (i + 1 == tokens.size() && lastTokenIsPrefix)
Retrieve<strings::PrefixDFAModifier<strings::LevenshteinDFA>>(token, insertId);
else
Retrieve<strings::LevenshteinDFA>(token, insertId);
}

// todo(@m) Do not bother with tf/idf for now.
results.clear();
for (auto id : ids)
{
CHECK_LESS(id, m_countries.size(), ());
results.emplace_back(m_countries[id].m_countryId);
}
}

void CountriesNamesIndex::ReadCountryNamesFromFile(vector<Country> & countries)
{
string contents;

GetPlatform().GetReader(COUNTRIES_NAMES_FILE)->ReadAsString(contents);
istringstream ifs(contents);

string line;
countries.clear();
while (getline(ifs, line))
{
if (line.empty())
continue;
strings::Trim(line);
if (line[0] == '[')
{
CHECK_EQUAL(line[line.size() - 1], ']', ());
countries.push_back({});
countries.back().m_countryId = line.substr(1, line.size() - 2);
continue;
}
auto pos = line.find('=');
if (pos == string::npos)
continue;
// Ignore the language code: the language sets differ for StringUtf8Multilang
// and for the translations used by this class.
auto t = line.substr(pos + 1);
strings::Trim(t);
if (!countries.empty())
countries.back().m_doc.m_translations.push_back(t);
}
}

void CountriesNamesIndex::BuildIndexFromTranslations()
{
for (size_t i = 0; i < m_countries.size(); ++i)
m_index.Add(i, m_countries[i].m_doc);
}
} // namespace search
65 changes: 65 additions & 0 deletions search/countries_names_index.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#pragma once

#include "search/base/mem_search_index.hpp"
#include "search/feature_offset_match.hpp"

#include "storage/storage_defines.hpp"

#include "indexer/search_string_utils.hpp"

#include "base/string_utils.hpp"

#include <cstddef>
#include <string>
#include <utility>
#include <vector>

namespace search
{
class CountriesNamesIndex
{
public:
struct Doc
{
template <typename Fn>
void ForEachToken(Fn && fn) const
{
for (auto const & s : m_translations)
fn(StringUtf8Multilang::kDefaultCode, NormalizeAndSimplifyString(s));
}

std::vector<std::string> m_translations;
};

CountriesNamesIndex();

void CollectMatchingCountries(std::string const & query,
std::vector<storage::CountryId> & results);

private:
struct Country
{
storage::CountryId m_countryId;
Doc m_doc;
};

// todo(@m) Almost the same as in bookmarks/processor.hpp.
template <typename DFA, typename Fn>
void Retrieve(strings::UniString const & s, Fn && fn) const
{
SearchTrieRequest<DFA> request;
request.m_names.emplace_back(BuildLevenshteinDFA(s));
request.m_langs.insert(StringUtf8Multilang::kDefaultCode);

MatchFeaturesInTrie(
request, m_index.GetRootIterator(), [](size_t id) { return true; } /* filter */,
std::forward<Fn>(fn));
}

void ReadCountryNamesFromFile(std::vector<Country> & countries);
void BuildIndexFromTranslations();

std::vector<Country> m_countries;
search_base::MemSearchIndex<size_t> m_index;
};
} // namespace search
Loading

0 comments on commit c35c388

Please sign in to comment.