From 8524fb32038e676d466045e68695f5d92c64078e Mon Sep 17 00:00:00 2001 From: TJ Date: Mon, 21 Oct 2024 19:44:30 +0900 Subject: [PATCH 1/6] feat: similar photos and search photos by photo --- README.md | 4 +- config/runtime.exs | 10 +- docs/worklogs/2024-10-21.md | 24 ++++ lib/save_it/bot.ex | 144 +++++++++++++++++++---- lib/save_it/google_oauth2_device_flow.ex | 24 ++-- lib/save_it/typesense_client.ex | 141 ++++++++++++++++++++++ lib/small_sdk/telegram.ex | 26 ++++ mix.exs | 3 +- mix.lock | 7 ++ 9 files changed, 345 insertions(+), 38 deletions(-) create mode 100644 docs/worklogs/2024-10-21.md create mode 100644 lib/save_it/typesense_client.ex create mode 100644 lib/small_sdk/telegram.ex diff --git a/README.md b/README.md index f3f9865..f47737d 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,8 @@ mix deps.get ```sh # run export TELEGRAM_BOT_TOKEN= -export GOOGLE_OAUTH_CLIENT_ID= -export GOOGLE_OAUTH_CLIENT_SECRET= +# export GOOGLE_OAUTH_CLIENT_ID= +# export GOOGLE_OAUTH_CLIENT_SECRET= iex -S mix run --no-halt ``` diff --git a/config/runtime.exs b/config/runtime.exs index 8b7d630..f2cc286 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -2,5 +2,11 @@ import Config config :save_it, :telegram_bot_token, System.fetch_env!("TELEGRAM_BOT_TOKEN") config :ex_gram, token: System.fetch_env!("TELEGRAM_BOT_TOKEN") -config :save_it, :google_oauth_client_id, System.fetch_env!("GOOGLE_OAUTH_CLIENT_ID") -config :save_it, :google_oauth_client_secret, System.fetch_env!("GOOGLE_OAUTH_CLIENT_SECRET") + +config :save_it, :google_oauth_client_id, System.get_env("GOOGLE_OAUTH_CLIENT_ID") +config :save_it, :google_oauth_client_secret, System.get_env("GOOGLE_OAUTH_CLIENT_SECRET") + +config :save_it, :typesense_url, System.get_env("TYPESENSE_URL", "http://localhost:8108") +config :save_it, :typesense_api_key, System.get_env("TYPESENSE_API_KEY", "xyz") +# optional +config :save_it, :web_url, System.get_env("WEB_URL", "http://localhost:4000") diff --git a/docs/worklogs/2024-10-21.md b/docs/worklogs/2024-10-21.md new file mode 100644 index 0000000..9f4683a --- /dev/null +++ b/docs/worklogs/2024-10-21.md @@ -0,0 +1,24 @@ +# 2024-10-21 + +Telegram API 无法得到长期有效的 URL,设计不好容易泄漏 token + +telegram API file's download_url `https://api.telegram.org/file/bot/` + +直接返回上面 URL 会导致 token 泄漏。 + +解决方案:实现一个 proxy server,实现 `bot` 和 `` 转换,proxy URL `//` + +```elixir +# client +proxy_url = "#{proxy_server}/#{bot_id}/#{file_id}" +``` + +```elixir +# proxy server +bot_token = get_bot_token(bot_id) + +file = get_file(file_id) +file_path = file.file_path + +download_url = "https://api.telegram.org/file/#{bot_ken}/#{file_path}" +``` diff --git a/lib/save_it/bot.ex b/lib/save_it/bot.ex index 74cd855..2e7ea45 100644 --- a/lib/save_it/bot.ex +++ b/lib/save_it/bot.ex @@ -5,6 +5,10 @@ defmodule SaveIt.Bot do alias SaveIt.GoogleDrive alias SaveIt.GoogleOAuth2DeviceFlow + alias SaveIt.TypesenseClient + + alias SmallSdk.Telegram + @bot :save_it_bot @progress [ @@ -19,10 +23,11 @@ defmodule SaveIt.Bot do setup_commands: true command("start") - command("about", description: "About the bot") - command("code", description: "Get code for login") + command("search", description: "Search similar photos by photo") command("login", description: "Login") + command("code", description: "Get code for login") command("folder", description: "Update Google Drive folder ID") + command("about", description: "About the bot") middleware(ExGram.Middleware.IgnoreUsername) @@ -94,25 +99,6 @@ defmodule SaveIt.Bot do end end - defp login_google(chat) do - device_code = FileHelper.get_google_device_code(chat.id) - - case GoogleOAuth2DeviceFlow.exchange_device_code_for_token(device_code) do - {:ok, body} -> - FileHelper.set_google_access_token(chat.id, body["access_token"]) - send_message(chat.id, "Successfully logged in!") - - {:error, error} -> - Logger.error("Failed to log in: #{inspect(error)}") - - send_message(chat.id, """ - Failed to log in. - - Please run `/code` to get a new code, then run `/login` again. - """) - end - end - def handle({:command, :folder, %{chat: chat, text: text}}, _context) do case text do nil -> @@ -127,6 +113,22 @@ defmodule SaveIt.Bot do end end + def handle({:command, :search, %{chat: chat, photo: nil}}, _context) do + send_message(chat.id, "Please send me a photo to search.") + # TODO: ex_gram 是否可以支持连续对话? + end + + def handle({:message, %{chat: chat, caption: caption, photo: photos}}, ctx) do + photo = List.last(photos) + + bot_id = ctx.bot_info.id + + similar_photos = + search_similar_photos_based_on_caption(bot_id, photo, caption) + + answer_similar_photos(chat.id, similar_photos) + end + def handle({:text, text, %{chat: chat, message_id: message_id}}, _context) do urls = extract_urls_from_string(text) @@ -202,12 +204,12 @@ defmodule SaveIt.Bot do ) end - download_file -> + downloaded_file -> Logger.info("👍 File already downloaded, don't need to download again") update_message(chat.id, progress_message.message_id, Enum.slice(@progress, 0..2)) - bot_send_file(chat.id, download_file, {:file, download_file}) + bot_send_file(chat.id, downloaded_file, {:file, downloaded_file}) delete_messages(chat.id, [message_id, progress_message.message_id]) end @@ -237,6 +239,44 @@ defmodule SaveIt.Bot do # {:ok, nil} # end + defp search_similar_photos(bot_id, photo, distance_threshold) do + file = ExGram.get_file!(photo.file_id) + + photo_file_content = Telegram.download_file_content!(file.file_path) + + TypesenseClient.search_photos!( + %{ + url: photo_url(bot_id, file.file_id), + caption: Map.get(photo, "caption", ""), + image: Base.encode64(photo_file_content) + }, + distance_threshold: distance_threshold + ) + end + + defp pick_file_id_from_photo_url(photo_url) do + %{"file_id" => file_id} = + Regex.named_captures(~r"/files/(?\d+)/(?.+)", photo_url) + + file_id + end + + defp answer_similar_photos(chat_id, nil) do + send_message(chat_id, "No similar photos found.") + end + + defp answer_similar_photos(chat_id, similar_photos) do + media = + Enum.map(similar_photos, fn photo -> + %ExGram.Model.InputMediaPhoto{ + type: "photo", + media: pick_file_id_from_photo_url(photo["url"]) + } + end) + + ExGram.send_media_group(chat_id, media) + end + defp extract_urls_from_string(str) do regex = ~r/http[s]?:\/\/[^\s]+/ matches = Regex.scan(regex, str) @@ -298,7 +338,21 @@ defmodule SaveIt.Bot do case file_extension(file_name) do ext when ext in [".png", ".jpg", ".jpeg"] -> - ExGram.send_photo(chat_id, content) + {:ok, msg} = ExGram.send_photo(chat_id, content) + bot_id = msg.from.id + file_id = get_file_id(msg) + + image_base64 = + case file_content do + {:file, file} -> File.read!(file) |> Base.encode64() + {:file_content, file_content, _file_name} -> Base.encode64(file_content) + end + + TypesenseClient.create_photo!(%{ + url: photo_url(bot_id, file_id), + caption: file_name, + image: image_base64 + }) ".mp4" -> ExGram.send_video(chat_id, content, supports_streaming: true) @@ -314,4 +368,46 @@ defmodule SaveIt.Bot do defp file_extension(file_name) do Path.extname(file_name) end + + defp get_file_id(msg) do + photo = + msg.photo + |> List.last() + + photo.file_id + end + + defp login_google(chat) do + device_code = FileHelper.get_google_device_code(chat.id) + + case GoogleOAuth2DeviceFlow.exchange_device_code_for_token(device_code) do + {:ok, body} -> + FileHelper.set_google_access_token(chat.id, body["access_token"]) + send_message(chat.id, "Successfully logged in!") + + {:error, error} -> + Logger.error("Failed to log in: #{inspect(error)}") + + send_message(chat.id, """ + Failed to log in. + + Please run `/code` to get a new code, then run `/login` again. + """) + end + end + + defp photo_url(bot_id, file_id) do + proxy_url = Application.fetch_env!(:save_it, :web_url) <> "/telegram/files" + Logger.debug("bot_id: #{bot_id}, file_id: #{file_id}, proxy_url: #{proxy_url}") + + "#{proxy_url}/#{bot_id}/#{file_id}" + end + + defp search_similar_photos_based_on_caption(bot_id, photo, caption) do + if caption && String.contains?(caption, "/search") do + search_similar_photos(bot_id, photo, 0.5) + else + search_similar_photos(bot_id, photo, 0.1) + end + end end diff --git a/lib/save_it/google_oauth2_device_flow.ex b/lib/save_it/google_oauth2_device_flow.ex index 28223ab..4288084 100644 --- a/lib/save_it/google_oauth2_device_flow.ex +++ b/lib/save_it/google_oauth2_device_flow.ex @@ -9,18 +9,22 @@ defmodule SaveIt.GoogleOAuth2DeviceFlow do {"Content-Type", "application/x-www-form-urlencoded"} ]) - @client_id Application.compile_env(:save_it, :google_oauth_client_id) - @client_secret Application.compile_env(:save_it, :google_oauth_client_secret) - @device_code_url "/device/code" - @token_url "/token" + defp get_env() do + client_id = Application.get_env(:save_it, :google_oauth_client_id) + client_secret = Application.get_env(:save_it, :google_oauth_client_secret) + + {client_id, client_secret} + end def get_device_code do + {client_id, _} = get_env() + body = %{ - client_id: @client_id, + client_id: client_id, scope: "https://www.googleapis.com/auth/drive.file" } - post(@device_code_url, body) + post("/device/code", body) |> handle_response() end @@ -39,14 +43,16 @@ defmodule SaveIt.GoogleOAuth2DeviceFlow do end def exchange_device_code_for_token(device_code) do + {client_id, client_secret} = get_env() + body = %{ - client_id: @client_id, - client_secret: @client_secret, + client_id: client_id, + client_secret: client_secret, device_code: device_code, grant_type: "urn:ietf:params:oauth:grant-type:device_code" } - post(@token_url, body) + post("/token", body) |> handle_response() end end diff --git a/lib/save_it/typesense_client.ex b/lib/save_it/typesense_client.ex new file mode 100644 index 0000000..81549cf --- /dev/null +++ b/lib/save_it/typesense_client.ex @@ -0,0 +1,141 @@ +defmodule SaveIt.TypesenseClient do + require Logger + + defp get_env() do + url = Application.get_env(:save_it, :typesense_url) + api_key = Application.get_env(:save_it, :typesense_api_key) + + {url, api_key} + end + + def create_photo!(photo) do + create_document!("photos", photo) + end + + def update_photo(photo) do + update_document("photos", photo) + end + + def get_photo(photo_id) do + get_document("photos", photo_id) + end + + def search_photos!(photo_params, opts \\ []) do + distance_threshold = Keyword.get(opts, :distance_threshold, 0.40) + photo = create_photo!(photo_params) + + photos = search_similar_photos!(photo["id"], distance_threshold: distance_threshold) + + photos + end + + def search_similar_photos!(photo_id, opts \\ []) when is_binary(photo_id) do + {url, api_key} = get_env() + + distance_threshold = Keyword.get(opts, :distance_threshold, 0.40) + + req = + Req.new( + base_url: url, + url: "/multi_search", + headers: [{"X-TYPESENSE-API-KEY", api_key}, {"Content-Type", "application/json"}] + ) + + {:ok, res} = + Req.post(req, + json: %{ + "searches" => [ + %{ + "collection" => "photos", + "q" => "*", + "vector_query" => + "image_embedding:([], id:#{photo_id}, distance_threshold: #{distance_threshold}, k: 4)", + "exclude_fields" => "image_embedding" + } + ] + } + ) + + res.body["results"] |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) + end + + defp get_document(collection_name, document_id) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections/#{collection_name}/documents/#{document_id}", + headers: [{"X-TYPESENSE-API-KEY", api_key}], + params: [exclude_fields: "image_embedding"] + ) + + {:ok, res} = Req.get(req) + + res.body + end + + defp create_document!(collection_name, document) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections/#{collection_name}/documents", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = Req.post(req, json: document) + + res.body + end + + defp update_document(collection_name, document) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections/#{collection_name}/documents/#{document[:id]}", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = Req.patch(req, json: document) + + res.body + end + + def create_search_key() do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/keys", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = + Req.post(req, + json: %{ + "description" => "Search-only photos key", + "actions" => ["documents:search"], + "collections" => ["photos"] + } + ) + + %{ + url: url, + api_key: res.body["value"] + } + end +end diff --git a/lib/small_sdk/telegram.ex b/lib/small_sdk/telegram.ex new file mode 100644 index 0000000..5ecb982 --- /dev/null +++ b/lib/small_sdk/telegram.ex @@ -0,0 +1,26 @@ +defmodule SmallSdk.Telegram do + require Logger + + use Tesla + + plug(Tesla.Middleware.BaseUrl, "https://api.telegram.org") + + def download_file_content(file_path) when is_binary(file_path) do + url = "/file/bot#{System.get_env("TELEGRAM_BOT_TOKEN")}/#{file_path}" + + case get(url) do + {:ok, response} -> + {:ok, response.body} + + {:error, error} -> + {:error, error} + end + end + + def download_file_content!(file_path) when is_binary(file_path) do + case download_file_content(file_path) do + {:ok, body} -> body + {:error, error} -> raise "Error: #{inspect(error)}" + end + end +end diff --git a/mix.exs b/mix.exs index bacf8be..721ab7d 100644 --- a/mix.exs +++ b/mix.exs @@ -23,7 +23,8 @@ defmodule SaveIt.MixProject do {:ex_gram, "~> 0.53"}, {:tesla, "~> 1.11"}, {:hackney, "~> 1.12"}, - {:jason, "~> 1.4.1"} + {:jason, "~> 1.4.1"}, + {:req, "~> 0.5.0"} ] end end diff --git a/mix.lock b/mix.lock index 6117df1..7632233 100644 --- a/mix.lock +++ b/mix.lock @@ -1,14 +1,21 @@ %{ "certifi": {:hex, :certifi, "2.12.0", "2d1cca2ec95f59643862af91f001478c9863c2ac9cb6e2f89780bfd8de987329", [:rebar3], [], "hexpm", "ee68d85df22e554040cdb4be100f33873ac6051387baf6a8f6ce82272340ff1c"}, "ex_gram": {:hex, :ex_gram, "0.53.0", "3e01be7a3e31c8ebb896b13ab38e38505481ddab385c0228e4c41da3105e4e15", [:mix], [{:gun, "~> 2.0", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.20", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:tesla, "~> 1.2", [hex: :tesla, repo: "hexpm", optional: true]}], "hexpm", "c0ba891c35cbb30a5e960f06ce1011de8e997f433df838d39b331ea3717d5cc4"}, + "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"}, "hackney": {:hex, :hackney, "1.20.1", "8d97aec62ddddd757d128bfd1df6c5861093419f8f7a4223823537bad5d064e2", [:rebar3], [{:certifi, "~> 2.12.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "fe9094e5f1a2a2c0a7d10918fee36bfec0ec2a979994cff8cfe8058cd9af38e3"}, + "hpax": {:hex, :hpax, "1.0.0", "28dcf54509fe2152a3d040e4e3df5b265dcb6cb532029ecbacf4ce52caea3fd2", [:mix], [], "hexpm", "7f1314731d711e2ca5fdc7fd361296593fc2542570b3105595bb0bc6d0fad601"}, "idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "metrics": {:hex, :metrics, "1.0.1", "25f094dea2cda98213cecc3aeff09e940299d950904393b2a29d191c346a8486", [:rebar3], [], "hexpm", "69b09adddc4f74a40716ae54d140f93beb0fb8978d8636eaded0c31b6f099f16"}, "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"}, "mimerl": {:hex, :mimerl, "1.3.0", "d0cd9fc04b9061f82490f6581e0128379830e78535e017f7780f37fea7545726", [:rebar3], [], "hexpm", "a1e15a50d1887217de95f0b9b0793e32853f7c258a5cd227650889b38839fe9d"}, + "mint": {:hex, :mint, "1.6.2", "af6d97a4051eee4f05b5500671d47c3a67dac7386045d87a904126fd4bbcea2e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "5ee441dffc1892f1ae59127f74afe8fd82fda6587794278d924e4d90ea3d63f9"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"}, + "req": {:hex, :req, "0.5.6", "8fe1eead4a085510fe3d51ad854ca8f20a622aae46e97b302f499dfb84f726ac", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "cfaa8e720945d46654853de39d368f40362c2641c4b2153c886418914b372185"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, + "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, "tesla": {:hex, :tesla, "1.12.1", "fe2bf4250868ee72e5d8b8dfa408d13a00747c41b7237b6aa3b9a24057346681", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:finch, "~> 0.13", [hex: :finch, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, ">= 1.0.0", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.2", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:msgpax, "~> 2.3", [hex: :msgpax, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "2391efc6243d37ead43afd0327b520314c7b38232091d4a440c1212626fdd6e7"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, } From f0eaba9c3082daf818cb71a6625e7ec28ad3fd93 Mon Sep 17 00:00:00 2001 From: TJ Date: Tue, 22 Oct 2024 10:00:04 +0900 Subject: [PATCH 2/6] =?UTF-8?q?feat:=20=F0=9F=8E=B8=20search=20photos=20us?= =?UTF-8?q?ing=20semantic=20search?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/save_it/bot.ex | 17 ++++++++++++++--- lib/save_it/typesense_client.ex | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/lib/save_it/bot.ex b/lib/save_it/bot.ex index 2e7ea45..04923f8 100644 --- a/lib/save_it/bot.ex +++ b/lib/save_it/bot.ex @@ -113,6 +113,12 @@ defmodule SaveIt.Bot do end end + def handle({:command, :search, %{chat: chat, photo: nil, text: q}} = msg, _context) do + photos = TypesenseClient.search_photos!(q) + + answer_photos(chat.id, photos) + end + def handle({:command, :search, %{chat: chat, photo: nil}}, _context) do send_message(chat.id, "Please send me a photo to search.") # TODO: ex_gram 是否可以支持连续对话? @@ -126,7 +132,7 @@ defmodule SaveIt.Bot do similar_photos = search_similar_photos_based_on_caption(bot_id, photo, caption) - answer_similar_photos(chat.id, similar_photos) + answer_photos(chat.id, similar_photos) end def handle({:text, text, %{chat: chat, message_id: message_id}}, _context) do @@ -228,6 +234,11 @@ defmodule SaveIt.Bot do {:ok, nil} end + def handle({:edited_message, _msg}, _context) do + Logger.warning("this is an edited message, ignore it") + {:ok, nil} + end + # def handle({:update, update}, _context) do # Logger.debug(":update: #{inspect(update)}") # {:ok, nil} @@ -261,11 +272,11 @@ defmodule SaveIt.Bot do file_id end - defp answer_similar_photos(chat_id, nil) do + defp answer_photos(chat_id, nil) do send_message(chat_id, "No similar photos found.") end - defp answer_similar_photos(chat_id, similar_photos) do + defp answer_photos(chat_id, similar_photos) do media = Enum.map(similar_photos, fn photo -> %ExGram.Model.InputMediaPhoto{ diff --git a/lib/save_it/typesense_client.ex b/lib/save_it/typesense_client.ex index 81549cf..ddccab1 100644 --- a/lib/save_it/typesense_client.ex +++ b/lib/save_it/typesense_client.ex @@ -20,6 +20,26 @@ defmodule SaveIt.TypesenseClient do get_document("photos", photo_id) end + def search_photos!(q) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "multi_search", + headers: [{"X-TYPESENSE-API-KEY", api_key}, {"Content-Type", "application/json"}] + ) + + {:ok, res} = + Req.post(req, + json: %{ + "searches" => [%{"collection" => "photos", "q" => q, "query_by" => "image_embedding"}] + } + ) + + res.body["results"] |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) + end + def search_photos!(photo_params, opts \\ []) do distance_threshold = Keyword.get(opts, :distance_threshold, 0.40) photo = create_photo!(photo_params) From 24874b1b1daad67996a49eacd8afbc0acb06894c Mon Sep 17 00:00:00 2001 From: TJ Date: Tue, 22 Oct 2024 14:33:34 +0900 Subject: [PATCH 3/6] setup: typesense in docker --- .formatter.exs | 2 +- config/runtime.exs | 6 +- docker-compose.yml | 15 +++++ lib/save_it/bot.ex | 76 +++++++++++++++++-------- lib/save_it/typesense_client.ex | 40 ++++++++++--- lib/small_sdk/telegram.ex | 2 +- lib/small_sdk/typesense_admin_client.ex | 75 ++++++++++++++++++++++++ priv/typesense/reset.exs | 3 + 8 files changed, 180 insertions(+), 39 deletions(-) create mode 100644 docker-compose.yml create mode 100644 lib/small_sdk/typesense_admin_client.ex create mode 100644 priv/typesense/reset.exs diff --git a/.formatter.exs b/.formatter.exs index c0dbb59..f616733 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -2,7 +2,7 @@ [ inputs: [ "{mix,.formatter}.exs", - "{config,lib,test}/**/*.{ex,exs}", + "{config,lib,test,priv}/**/*.{ex,exs}", "_local/**/*.{ex,exs}" ] ] diff --git a/config/runtime.exs b/config/runtime.exs index f2cc286..8057887 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -1,12 +1,12 @@ import Config -config :save_it, :telegram_bot_token, System.fetch_env!("TELEGRAM_BOT_TOKEN") -config :ex_gram, token: System.fetch_env!("TELEGRAM_BOT_TOKEN") +config :save_it, :telegram_bot_token, System.get_env("TELEGRAM_BOT_TOKEN") +config :ex_gram, token: System.get_env("TELEGRAM_BOT_TOKEN") config :save_it, :google_oauth_client_id, System.get_env("GOOGLE_OAUTH_CLIENT_ID") config :save_it, :google_oauth_client_secret, System.get_env("GOOGLE_OAUTH_CLIENT_SECRET") -config :save_it, :typesense_url, System.get_env("TYPESENSE_URL", "http://localhost:8108") +config :save_it, :typesense_url, System.get_env("TYPESENSE_URL", "http://localhost:8100") config :save_it, :typesense_api_key, System.get_env("TYPESENSE_API_KEY", "xyz") # optional config :save_it, :web_url, System.get_env("WEB_URL", "http://localhost:4000") diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..eff3ed0 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,15 @@ +services: + typesense: + image: typesense/typesense:27.1 + restart: on-failure + hostname: typesense + ports: + - "8100:8108" + volumes: + - ./data/typesense-data:/data + command: "--data-dir /data --api-key=xyz --enable-cors" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8100/health"] + interval: 30s + timeout: 10s + retries: 5 diff --git a/lib/save_it/bot.ex b/lib/save_it/bot.ex index 04923f8..c4e0ff2 100644 --- a/lib/save_it/bot.ex +++ b/lib/save_it/bot.ex @@ -113,26 +113,23 @@ defmodule SaveIt.Bot do end end - def handle({:command, :search, %{chat: chat, photo: nil, text: q}} = msg, _context) do - photos = TypesenseClient.search_photos!(q) + def handle({:command, :search, %{chat: chat, photo: nil, text: q}}, _context) do + photos = TypesenseClient.search_photos!(q: q) answer_photos(chat.id, photos) end def handle({:command, :search, %{chat: chat, photo: nil}}, _context) do send_message(chat.id, "Please send me a photo to search.") - # TODO: ex_gram 是否可以支持连续对话? end def handle({:message, %{chat: chat, caption: caption, photo: photos}}, ctx) do photo = List.last(photos) - bot_id = ctx.bot_info.id - - similar_photos = - search_similar_photos_based_on_caption(bot_id, photo, caption) - - answer_photos(chat.id, similar_photos) + search_similar_photos_based_on_caption(photo, caption, + chat_id: chat.id, + bot_id: ctx.bot_info.id + ) end def handle({:text, text, %{chat: chat, message_id: message_id}}, _context) do @@ -250,19 +247,29 @@ defmodule SaveIt.Bot do # {:ok, nil} # end - defp search_similar_photos(bot_id, photo, distance_threshold) do + defp search_similar_photos(photo, opts) do file = ExGram.get_file!(photo.file_id) photo_file_content = Telegram.download_file_content!(file.file_path) - TypesenseClient.search_photos!( - %{ - url: photo_url(bot_id, file.file_id), + bot_id = Keyword.get(opts, :bot_id) + chat_id = Keyword.get(opts, :chat_id) + distance_threshold = Keyword.get(opts, :distance_threshold, 0.4) + + typesense_photo = + TypesenseClient.create_photo!(%{ + image: Base.encode64(photo_file_content), caption: Map.get(photo, "caption", ""), - image: Base.encode64(photo_file_content) - }, - distance_threshold: distance_threshold - ) + url: photo_url(bot_id, file.file_id), + belongs_to_id: chat_id + }) + + if typesense_photo != nil do + TypesenseClient.search_photos!( + typesense_photo["id"], + distance_threshold: distance_threshold + ) + end end defp pick_file_id_from_photo_url(photo_url) do @@ -273,7 +280,11 @@ defmodule SaveIt.Bot do end defp answer_photos(chat_id, nil) do - send_message(chat_id, "No similar photos found.") + send_message(chat_id, "No photos found.") + end + + defp answer_photos(chat_id, []) do + send_message(chat_id, "No photos found.") end defp answer_photos(chat_id, similar_photos) do @@ -281,7 +292,9 @@ defmodule SaveIt.Bot do Enum.map(similar_photos, fn photo -> %ExGram.Model.InputMediaPhoto{ type: "photo", - media: pick_file_id_from_photo_url(photo["url"]) + media: pick_file_id_from_photo_url(photo["url"]), + caption: "Found photos", + show_caption_above_media: true } end) @@ -360,9 +373,10 @@ defmodule SaveIt.Bot do end TypesenseClient.create_photo!(%{ - url: photo_url(bot_id, file_id), + image: image_base64, caption: file_name, - image: image_base64 + url: photo_url(bot_id, file_id), + belongs_to_id: chat_id }) ".mp4" -> @@ -409,16 +423,28 @@ defmodule SaveIt.Bot do defp photo_url(bot_id, file_id) do proxy_url = Application.fetch_env!(:save_it, :web_url) <> "/telegram/files" - Logger.debug("bot_id: #{bot_id}, file_id: #{file_id}, proxy_url: #{proxy_url}") "#{proxy_url}/#{bot_id}/#{file_id}" end - defp search_similar_photos_based_on_caption(bot_id, photo, caption) do + defp search_similar_photos_based_on_caption(photo, caption, opts) do + bot_id = Keyword.get(opts, :bot_id) + chat_id = Keyword.get(opts, :chat_id) + if caption && String.contains?(caption, "/search") do - search_similar_photos(bot_id, photo, 0.5) + similar_photos = + search_similar_photos(photo, + distance_threshold: 0.4, + bot_id: bot_id, + chat_id: chat_id + ) + + answer_photos(chat_id, similar_photos) else - search_similar_photos(bot_id, photo, 0.1) + similar_photos = + search_similar_photos(photo, distance_threshold: 0.1, bot_id: bot_id, chat_id: chat_id) + + answer_photos(chat_id, similar_photos) end end end diff --git a/lib/save_it/typesense_client.ex b/lib/save_it/typesense_client.ex index ddccab1..fea3bd9 100644 --- a/lib/save_it/typesense_client.ex +++ b/lib/save_it/typesense_client.ex @@ -8,8 +8,20 @@ defmodule SaveIt.TypesenseClient do {url, api_key} end - def create_photo!(photo) do - create_document!("photos", photo) + def create_photo!( + %{ + belongs_to_id: belongs_to_id + } = photo_params + ) do + photo_create_input = + photo_params + |> Map.put(:belongs_to_id, Integer.to_string(belongs_to_id)) + |> Map.put(:inserted_at, DateTime.utc_now() |> DateTime.to_unix()) + + create_document!( + "photos", + photo_create_input + ) end def update_photo(photo) do @@ -20,7 +32,7 @@ defmodule SaveIt.TypesenseClient do get_document("photos", photo_id) end - def search_photos!(q) do + def search_photos!(q: q) do {url, api_key} = get_env() req = @@ -33,18 +45,28 @@ defmodule SaveIt.TypesenseClient do {:ok, res} = Req.post(req, json: %{ - "searches" => [%{"collection" => "photos", "q" => q, "query_by" => "image_embedding"}] + "searches" => [ + %{ + "q" => q, + "query_by" => "image_embedding", + "collection" => "photos", + "prefix" => false, + "vector_query" => "image_embedding:([], k: 5, distance_threshold: 0.75)", + "exclude_fields" => "image_embedding" + } + ] } ) + Logger.debug("debug: res #{inspect(res)}") + res.body["results"] |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) end - def search_photos!(photo_params, opts \\ []) do - distance_threshold = Keyword.get(opts, :distance_threshold, 0.40) - photo = create_photo!(photo_params) + def search_photos!(id, opts \\ []) do + distance_threshold = Keyword.get(opts, :distance_threshold, 0.4) - photos = search_similar_photos!(photo["id"], distance_threshold: distance_threshold) + photos = search_similar_photos!(id, distance_threshold: distance_threshold) photos end @@ -52,7 +74,7 @@ defmodule SaveIt.TypesenseClient do def search_similar_photos!(photo_id, opts \\ []) when is_binary(photo_id) do {url, api_key} = get_env() - distance_threshold = Keyword.get(opts, :distance_threshold, 0.40) + distance_threshold = Keyword.get(opts, :distance_threshold, 0.4) req = Req.new( diff --git a/lib/small_sdk/telegram.ex b/lib/small_sdk/telegram.ex index 5ecb982..ad4586c 100644 --- a/lib/small_sdk/telegram.ex +++ b/lib/small_sdk/telegram.ex @@ -6,7 +6,7 @@ defmodule SmallSdk.Telegram do plug(Tesla.Middleware.BaseUrl, "https://api.telegram.org") def download_file_content(file_path) when is_binary(file_path) do - url = "/file/bot#{System.get_env("TELEGRAM_BOT_TOKEN")}/#{file_path}" + url = "/file/bot#{Application.fetch_env!(:save_it, :telegram_bot_token)}/#{file_path}" case get(url) do {:ok, response} -> diff --git a/lib/small_sdk/typesense_admin_client.ex b/lib/small_sdk/typesense_admin_client.ex new file mode 100644 index 0000000..778ad7a --- /dev/null +++ b/lib/small_sdk/typesense_admin_client.ex @@ -0,0 +1,75 @@ +defmodule SmallSdk.TypesenseAdminClient do + @photos_schema %{ + "name" => "photos", + "fields" => [ + # image: base64 encoded string + %{"name" => "image", "type" => "image", "store" => false}, + %{ + "name" => "image_embedding", + "type" => "float[]", + "embed" => %{ + "from" => ["image"], + "model_config" => %{ + "model_name" => "ts/clip-vit-b-p32" + } + } + }, + %{"name" => "caption", "type" => "string", "optional" => true, "facet" => false}, + # "telegram:///" + %{"name" => "url", "type" => "string"}, + # chat.id -> string + %{"name" => "belongs_to_id", "type" => "string"}, + # unix timestamp + %{"name" => "inserted_at", "type" => "int64"} + ], + "default_sorting_field" => "inserted_at" + } + + def reset() do + delete_collection!(@photos_schema["name"]) + create_collection!(@photos_schema) + end + + def create_collection!(schema) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = Req.post(req, json: schema) + + res.body + end + + def delete_collection!(collection_name) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections/#{collection_name}", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = Req.delete(req) + + res.body + end + + defp get_env() do + url = Application.fetch_env!(:save_it, :typesense_url) + api_key = Application.fetch_env!(:save_it, :typesense_api_key) + + {url, api_key} + end +end diff --git a/priv/typesense/reset.exs b/priv/typesense/reset.exs new file mode 100644 index 0000000..e54c206 --- /dev/null +++ b/priv/typesense/reset.exs @@ -0,0 +1,3 @@ +# mix run priv/typesense/reset.exs + +SmallSdk.TypesenseAdminClient.reset() From f2e48781a935490468702982be3a94a9904459a2 Mon Sep 17 00:00:00 2001 From: TJ Date: Tue, 22 Oct 2024 15:00:26 +0900 Subject: [PATCH 4/6] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20sdk?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/save_it/bot.ex | 27 +++-- ...typesense_client.ex => typesense_photo.ex} | 104 ++---------------- lib/small_sdk/typesense.ex | 90 +++++++++++++++ ...nse_admin_client.ex => typesense_admin.ex} | 2 +- priv/typesense/reset.exs | 2 +- 5 files changed, 115 insertions(+), 110 deletions(-) rename lib/save_it/{typesense_client.ex => typesense_photo.ex} (50%) create mode 100644 lib/small_sdk/typesense.ex rename lib/small_sdk/{typesense_admin_client.ex => typesense_admin.ex} (97%) diff --git a/lib/save_it/bot.ex b/lib/save_it/bot.ex index c4e0ff2..b02538e 100644 --- a/lib/save_it/bot.ex +++ b/lib/save_it/bot.ex @@ -5,7 +5,7 @@ defmodule SaveIt.Bot do alias SaveIt.GoogleDrive alias SaveIt.GoogleOAuth2DeviceFlow - alias SaveIt.TypesenseClient + alias SaveIt.TypesensePhoto alias SmallSdk.Telegram @@ -114,7 +114,7 @@ defmodule SaveIt.Bot do end def handle({:command, :search, %{chat: chat, photo: nil, text: q}}, _context) do - photos = TypesenseClient.search_photos!(q: q) + photos = TypesensePhoto.search_photos!(q: q) answer_photos(chat.id, photos) end @@ -236,16 +236,15 @@ defmodule SaveIt.Bot do {:ok, nil} end - # def handle({:update, update}, _context) do - # Logger.debug(":update: #{inspect(update)}") - # {:ok, nil} - # end + def handle({:update, _update}, _context) do + Logger.warning("this is an update, ignore it") + {:ok, nil} + end - # Doc: https://hexdocs.pm/ex_gram/readme.html#how-to-handle-messages - # def handle({:message, message}, _context) do - # Logger.debug(":message: #{inspect(message)}") - # {:ok, nil} - # end + def handle({:message, _message}, _context) do + Logger.warning("this is a message, ignore it") + {:ok, nil} + end defp search_similar_photos(photo, opts) do file = ExGram.get_file!(photo.file_id) @@ -257,7 +256,7 @@ defmodule SaveIt.Bot do distance_threshold = Keyword.get(opts, :distance_threshold, 0.4) typesense_photo = - TypesenseClient.create_photo!(%{ + TypesensePhoto.create_photo!(%{ image: Base.encode64(photo_file_content), caption: Map.get(photo, "caption", ""), url: photo_url(bot_id, file.file_id), @@ -265,7 +264,7 @@ defmodule SaveIt.Bot do }) if typesense_photo != nil do - TypesenseClient.search_photos!( + TypesensePhoto.search_photos!( typesense_photo["id"], distance_threshold: distance_threshold ) @@ -372,7 +371,7 @@ defmodule SaveIt.Bot do {:file_content, file_content, _file_name} -> Base.encode64(file_content) end - TypesenseClient.create_photo!(%{ + TypesensePhoto.create_photo!(%{ image: image_base64, caption: file_name, url: photo_url(bot_id, file_id), diff --git a/lib/save_it/typesense_client.ex b/lib/save_it/typesense_photo.ex similarity index 50% rename from lib/save_it/typesense_client.ex rename to lib/save_it/typesense_photo.ex index fea3bd9..239f447 100644 --- a/lib/save_it/typesense_client.ex +++ b/lib/save_it/typesense_photo.ex @@ -1,12 +1,5 @@ -defmodule SaveIt.TypesenseClient do - require Logger - - defp get_env() do - url = Application.get_env(:save_it, :typesense_url) - api_key = Application.get_env(:save_it, :typesense_api_key) - - {url, api_key} - end +defmodule SaveIt.TypesensePhoto do + alias SmallSdk.Typesense def create_photo!( %{ @@ -18,18 +11,18 @@ defmodule SaveIt.TypesenseClient do |> Map.put(:belongs_to_id, Integer.to_string(belongs_to_id)) |> Map.put(:inserted_at, DateTime.utc_now() |> DateTime.to_unix()) - create_document!( + Typesense.create_document!( "photos", photo_create_input ) end def update_photo(photo) do - update_document("photos", photo) + Typesense.update_document("photos", photo) end def get_photo(photo_id) do - get_document("photos", photo_id) + Typesense.get_document("photos", photo_id) end def search_photos!(q: q) do @@ -58,17 +51,13 @@ defmodule SaveIt.TypesenseClient do } ) - Logger.debug("debug: res #{inspect(res)}") - res.body["results"] |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) end def search_photos!(id, opts \\ []) do distance_threshold = Keyword.get(opts, :distance_threshold, 0.4) - photos = search_similar_photos!(id, distance_threshold: distance_threshold) - - photos + search_similar_photos!(id, distance_threshold: distance_threshold) end def search_similar_photos!(photo_id, opts \\ []) when is_binary(photo_id) do @@ -101,83 +90,10 @@ defmodule SaveIt.TypesenseClient do res.body["results"] |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) end - defp get_document(collection_name, document_id) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections/#{collection_name}/documents/#{document_id}", - headers: [{"X-TYPESENSE-API-KEY", api_key}], - params: [exclude_fields: "image_embedding"] - ) - - {:ok, res} = Req.get(req) - - res.body - end - - defp create_document!(collection_name, document) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections/#{collection_name}/documents", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) - - {:ok, res} = Req.post(req, json: document) - - res.body - end - - defp update_document(collection_name, document) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections/#{collection_name}/documents/#{document[:id]}", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) - - {:ok, res} = Req.patch(req, json: document) - - res.body - end - - def create_search_key() do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/keys", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) - - {:ok, res} = - Req.post(req, - json: %{ - "description" => "Search-only photos key", - "actions" => ["documents:search"], - "collections" => ["photos"] - } - ) + defp get_env() do + url = Application.fetch_env!(:save_it, :typesense_url) + api_key = Application.fetch_env!(:save_it, :typesense_api_key) - %{ - url: url, - api_key: res.body["value"] - } + {url, api_key} end end diff --git a/lib/small_sdk/typesense.ex b/lib/small_sdk/typesense.ex new file mode 100644 index 0000000..3ce5af2 --- /dev/null +++ b/lib/small_sdk/typesense.ex @@ -0,0 +1,90 @@ +defmodule SmallSdk.Typesense do + require Logger + + def create_document!(collection_name, document) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections/#{collection_name}/documents", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = Req.post(req, json: document) + + res.body + end + + def get_document(collection_name, document_id) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections/#{collection_name}/documents/#{document_id}", + headers: [{"X-TYPESENSE-API-KEY", api_key}], + params: [exclude_fields: "image_embedding"] + ) + + {:ok, res} = Req.get(req) + + res.body + end + + def update_document(collection_name, document) do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/collections/#{collection_name}/documents/#{document[:id]}", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = Req.patch(req, json: document) + + res.body + end + + def create_search_key() do + {url, api_key} = get_env() + + req = + Req.new( + base_url: url, + url: "/keys", + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + + {:ok, res} = + Req.post(req, + json: %{ + "description" => "Search-only photos key", + "actions" => ["documents:search"], + "collections" => ["photos"] + } + ) + + %{ + url: url, + api_key: res.body["value"] + } + end + + defp get_env() do + url = Application.get_env(:save_it, :typesense_url) + api_key = Application.get_env(:save_it, :typesense_api_key) + + {url, api_key} + end +end diff --git a/lib/small_sdk/typesense_admin_client.ex b/lib/small_sdk/typesense_admin.ex similarity index 97% rename from lib/small_sdk/typesense_admin_client.ex rename to lib/small_sdk/typesense_admin.ex index 778ad7a..919c5e6 100644 --- a/lib/small_sdk/typesense_admin_client.ex +++ b/lib/small_sdk/typesense_admin.ex @@ -1,4 +1,4 @@ -defmodule SmallSdk.TypesenseAdminClient do +defmodule SmallSdk.TypesenseAdmin do @photos_schema %{ "name" => "photos", "fields" => [ diff --git a/priv/typesense/reset.exs b/priv/typesense/reset.exs index e54c206..00773d2 100644 --- a/priv/typesense/reset.exs +++ b/priv/typesense/reset.exs @@ -1,3 +1,3 @@ # mix run priv/typesense/reset.exs -SmallSdk.TypesenseAdminClient.reset() +SmallSdk.TypesenseAdmin.reset() From e621e798f3b4270b352aef098a2682cd78614d2f Mon Sep 17 00:00:00 2001 From: TJ Date: Tue, 22 Oct 2024 15:28:25 +0900 Subject: [PATCH 5/6] =?UTF-8?q?refactor:=20=F0=9F=92=A1=20fix=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/worklogs/2024-10-21.md | 2 +- lib/save_it/bot.ex | 61 +++++++++------ lib/save_it/google_oauth2_device_flow.ex | 14 ++-- lib/save_it/typesense_photo.ex | 95 ++++++++++++------------ lib/small_sdk/typesense.ex | 70 ++++++----------- lib/small_sdk/typesense_admin.ex | 39 ++++------ 6 files changed, 131 insertions(+), 150 deletions(-) diff --git a/docs/worklogs/2024-10-21.md b/docs/worklogs/2024-10-21.md index 9f4683a..1791572 100644 --- a/docs/worklogs/2024-10-21.md +++ b/docs/worklogs/2024-10-21.md @@ -20,5 +20,5 @@ bot_token = get_bot_token(bot_id) file = get_file(file_id) file_path = file.file_path -download_url = "https://api.telegram.org/file/#{bot_ken}/#{file_path}" +download_url = "https://api.telegram.org/file/#{bot_token}/#{file_path}" ``` diff --git a/lib/save_it/bot.ex b/lib/save_it/bot.ex index b02538e..319a8af 100644 --- a/lib/save_it/bot.ex +++ b/lib/save_it/bot.ex @@ -272,10 +272,16 @@ defmodule SaveIt.Bot do end defp pick_file_id_from_photo_url(photo_url) do - %{"file_id" => file_id} = + captures = Regex.named_captures(~r"/files/(?\d+)/(?.+)", photo_url) - file_id + if captures == nil do + Logger.error("Invalid photo URL: #{photo_url}") + nil + else + %{"file_id" => file_id} = captures + file_id + end end defp answer_photos(chat_id, nil) do @@ -297,7 +303,14 @@ defmodule SaveIt.Bot do } end) - ExGram.send_media_group(chat_id, media) + case ExGram.send_media_group(chat_id, media) do + {:ok, _response} -> + :ok + + {:error, reason} -> + Logger.error("Failed to send media group: #{inspect(reason)}") + send_message(chat_id, "Failed to send photos.") + end end defp extract_urls_from_string(str) do @@ -394,11 +407,15 @@ defmodule SaveIt.Bot do end defp get_file_id(msg) do - photo = - msg.photo - |> List.last() + case msg.photo do + photos when is_list(photos) and length(photos) > 0 -> + photo = List.last(photos) + photo.file_id - photo.file_id + _ -> + Logger.error("No photo found in the message") + nil + end end defp login_google(chat) do @@ -423,27 +440,29 @@ defmodule SaveIt.Bot do defp photo_url(bot_id, file_id) do proxy_url = Application.fetch_env!(:save_it, :web_url) <> "/telegram/files" - "#{proxy_url}/#{bot_id}/#{file_id}" + encoded_bot_id = URI.encode(bot_id |> to_string()) + encoded_file_id = URI.encode(file_id) + "#{proxy_url}/#{encoded_bot_id}/#{encoded_file_id}" end defp search_similar_photos_based_on_caption(photo, caption, opts) do bot_id = Keyword.get(opts, :bot_id) chat_id = Keyword.get(opts, :chat_id) - if caption && String.contains?(caption, "/search") do - similar_photos = - search_similar_photos(photo, - distance_threshold: 0.4, - bot_id: bot_id, - chat_id: chat_id - ) + distance_threshold = + if caption && String.contains?(caption, "/search") do + 0.4 + else + 0.1 + end - answer_photos(chat_id, similar_photos) - else - similar_photos = - search_similar_photos(photo, distance_threshold: 0.1, bot_id: bot_id, chat_id: chat_id) + similar_photos = + search_similar_photos(photo, + distance_threshold: distance_threshold, + bot_id: bot_id, + chat_id: chat_id + ) - answer_photos(chat_id, similar_photos) - end + answer_photos(chat_id, similar_photos) end end diff --git a/lib/save_it/google_oauth2_device_flow.ex b/lib/save_it/google_oauth2_device_flow.ex index 4288084..3fed3fd 100644 --- a/lib/save_it/google_oauth2_device_flow.ex +++ b/lib/save_it/google_oauth2_device_flow.ex @@ -9,13 +9,6 @@ defmodule SaveIt.GoogleOAuth2DeviceFlow do {"Content-Type", "application/x-www-form-urlencoded"} ]) - defp get_env() do - client_id = Application.get_env(:save_it, :google_oauth_client_id) - client_secret = Application.get_env(:save_it, :google_oauth_client_secret) - - {client_id, client_secret} - end - def get_device_code do {client_id, _} = get_env() @@ -55,4 +48,11 @@ defmodule SaveIt.GoogleOAuth2DeviceFlow do post("/token", body) |> handle_response() end + + defp get_env() do + client_id = Application.fetch_env!(:save_it, :google_oauth_client_id) + client_secret = Application.fetch_env!(:save_it, :google_oauth_client_secret) + + {client_id, client_secret} + end end diff --git a/lib/save_it/typesense_photo.ex b/lib/save_it/typesense_photo.ex index 239f447..9bcf152 100644 --- a/lib/save_it/typesense_photo.ex +++ b/lib/save_it/typesense_photo.ex @@ -18,7 +18,7 @@ defmodule SaveIt.TypesensePhoto do end def update_photo(photo) do - Typesense.update_document("photos", photo) + Typesense.update_document("photos", photo.id, photo) end def get_photo(photo_id) do @@ -26,32 +26,23 @@ defmodule SaveIt.TypesensePhoto do end def search_photos!(q: q) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "multi_search", - headers: [{"X-TYPESENSE-API-KEY", api_key}, {"Content-Type", "application/json"}] - ) - - {:ok, res} = - Req.post(req, - json: %{ - "searches" => [ - %{ - "q" => q, - "query_by" => "image_embedding", - "collection" => "photos", - "prefix" => false, - "vector_query" => "image_embedding:([], k: 5, distance_threshold: 0.75)", - "exclude_fields" => "image_embedding" - } - ] + req_body = %{ + "searches" => [ + %{ + "q" => q, + "query_by" => "image_embedding", + "collection" => "photos", + "prefix" => false, + "vector_query" => "image_embedding:([], k: 5, distance_threshold: 0.75)", + "exclude_fields" => "image_embedding" } - ) + ] + } + + req = build_request("/multi_search") + {:ok, res} = Req.post(req, json: req_body) - res.body["results"] |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) + res.body["results"] |> typesense_results_to_documents() end def search_photos!(id, opts \\ []) do @@ -61,33 +52,28 @@ defmodule SaveIt.TypesensePhoto do end def search_similar_photos!(photo_id, opts \\ []) when is_binary(photo_id) do - {url, api_key} = get_env() - distance_threshold = Keyword.get(opts, :distance_threshold, 0.4) - req = - Req.new( - base_url: url, - url: "/multi_search", - headers: [{"X-TYPESENSE-API-KEY", api_key}, {"Content-Type", "application/json"}] - ) - - {:ok, res} = - Req.post(req, - json: %{ - "searches" => [ - %{ - "collection" => "photos", - "q" => "*", - "vector_query" => - "image_embedding:([], id:#{photo_id}, distance_threshold: #{distance_threshold}, k: 4)", - "exclude_fields" => "image_embedding" - } - ] + req_body = %{ + "searches" => [ + %{ + "collection" => "photos", + "q" => "*", + "vector_query" => + "image_embedding:([], id:#{photo_id}, distance_threshold: #{distance_threshold}, k: 4)", + "exclude_fields" => "image_embedding" } - ) + ] + } + + req = build_request("/multi_search") + {:ok, res} = Req.post(req, json: req_body) - res.body["results"] |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) + res.body["results"] |> typesense_results_to_documents() + end + + defp typesense_results_to_documents(results) do + results |> hd() |> Map.get("hits") |> Enum.map(&Map.get(&1, "document")) end defp get_env() do @@ -96,4 +82,17 @@ defmodule SaveIt.TypesensePhoto do {url, api_key} end + + defp build_request(path) do + {url, api_key} = get_env() + + Req.new( + base_url: url, + url: path, + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + end end diff --git a/lib/small_sdk/typesense.ex b/lib/small_sdk/typesense.ex index 3ce5af2..c878012 100644 --- a/lib/small_sdk/typesense.ex +++ b/lib/small_sdk/typesense.ex @@ -2,69 +2,28 @@ defmodule SmallSdk.Typesense do require Logger def create_document!(collection_name, document) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections/#{collection_name}/documents", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) - + req = build_request("/collections/#{collection_name}/documents") {:ok, res} = Req.post(req, json: document) res.body end def get_document(collection_name, document_id) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections/#{collection_name}/documents/#{document_id}", - headers: [{"X-TYPESENSE-API-KEY", api_key}], - params: [exclude_fields: "image_embedding"] - ) - + req = build_request("/collections/#{collection_name}/documents/#{document_id}") {:ok, res} = Req.get(req) res.body end - def update_document(collection_name, document) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections/#{collection_name}/documents/#{document[:id]}", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) - - {:ok, res} = Req.patch(req, json: document) + def update_document(collection_name, document_id, update_input) do + req = build_request("/collections/#{collection_name}/documents/#{document_id}") + {:ok, res} = Req.patch(req, json: update_input) res.body end def create_search_key() do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/keys", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) + req = build_request("/keys") {:ok, res} = Req.post(req, @@ -82,9 +41,22 @@ defmodule SmallSdk.Typesense do end defp get_env() do - url = Application.get_env(:save_it, :typesense_url) - api_key = Application.get_env(:save_it, :typesense_api_key) + url = Application.fetch_env!(:save_it, :typesense_url) + api_key = Application.fetch_env!(:save_it, :typesense_api_key) {url, api_key} end + + defp build_request(path) do + {url, api_key} = get_env() + + Req.new( + base_url: url, + url: path, + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + end end diff --git a/lib/small_sdk/typesense_admin.ex b/lib/small_sdk/typesense_admin.ex index 919c5e6..1b5fd6d 100644 --- a/lib/small_sdk/typesense_admin.ex +++ b/lib/small_sdk/typesense_admin.ex @@ -31,36 +31,14 @@ defmodule SmallSdk.TypesenseAdmin do end def create_collection!(schema) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) - + req = build_request("/collections") {:ok, res} = Req.post(req, json: schema) res.body end def delete_collection!(collection_name) do - {url, api_key} = get_env() - - req = - Req.new( - base_url: url, - url: "/collections/#{collection_name}", - headers: [ - {"Content-Type", "application/json"}, - {"X-TYPESENSE-API-KEY", api_key} - ] - ) - + req = build_request("/collections/#{collection_name}") {:ok, res} = Req.delete(req) res.body @@ -72,4 +50,17 @@ defmodule SmallSdk.TypesenseAdmin do {url, api_key} end + + defp build_request(path) do + {url, api_key} = get_env() + + Req.new( + base_url: url, + url: path, + headers: [ + {"Content-Type", "application/json"}, + {"X-TYPESENSE-API-KEY", api_key} + ] + ) + end end From fb13ac193b047488a9719d3102e37aa0a94d0e3e Mon Sep 17 00:00:00 2001 From: TJ Date: Tue, 22 Oct 2024 17:28:22 +0900 Subject: [PATCH 6/6] fix: miss get_env --- .github/workflows/test.yml | 40 ++++++++++++++++++++++++++++++++++++++ lib/save_it.ex | 5 +++++ lib/small_sdk/typesense.ex | 1 + 3 files changed, 46 insertions(+) create mode 100644 .github/workflows/test.yml create mode 100644 lib/save_it.ex diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..0a47213 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,40 @@ +name: Elixir CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Elixir + uses: erlef/setup-beam@v1 + with: + elixir-version: "1.15" + otp-version: "26.0" + + - name: Cache dependencies + uses: actions/cache@v3 + with: + path: deps + key: ${{ runner.os }}-mix-${{ hashFiles('mix.lock') }} + restore-keys: | + ${{ runner.os }}-mix- + + - name: Install dependencies + run: | + mix local.hex --force + mix local.rebar --force + mix deps.get + + - name: Run tests + run: mix test diff --git a/lib/save_it.ex b/lib/save_it.ex new file mode 100644 index 0000000..90db469 --- /dev/null +++ b/lib/save_it.ex @@ -0,0 +1,5 @@ +defmodule SaveIt do + def hello do + :world + end +end diff --git a/lib/small_sdk/typesense.ex b/lib/small_sdk/typesense.ex index c878012..6260713 100644 --- a/lib/small_sdk/typesense.ex +++ b/lib/small_sdk/typesense.ex @@ -23,6 +23,7 @@ defmodule SmallSdk.Typesense do end def create_search_key() do + {url, _} = get_env() req = build_request("/keys") {:ok, res} =