From c181e555db4a90f770418af67b1073ec958adb4d Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Fri, 29 May 2020 22:03:14 +0300 Subject: [PATCH 1/2] [#1794] Improvements to hashtags extraction from search query. --- .../controllers/search_controller.ex | 40 ++++++++++++++----- .../controllers/search_controller_test.exs | 36 ++++++++++++++++- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 77e2224e4..23fe378a6 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -113,22 +113,44 @@ defp resource_search(:v2, "hashtags", query, _options) do query |> prepare_tags() |> Enum.map(fn tag -> - tag = String.trim_leading(tag, "#") %{name: tag, url: tags_path <> tag} end) end defp resource_search(:v1, "hashtags", query, _options) do - query - |> prepare_tags() - |> Enum.map(fn tag -> String.trim_leading(tag, "#") end) + prepare_tags(query) end - defp prepare_tags(query) do - query - |> String.split() - |> Enum.uniq() - |> Enum.filter(fn tag -> String.starts_with?(tag, "#") end) + defp prepare_tags(query, add_joined_tag \\ true) do + tags = + query + |> String.split(~r/[^#\w]+/, trim: true) + |> Enum.uniq_by(&String.downcase/1) + + explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end) + + tags = + if Enum.any?(explicit_tags) do + explicit_tags + else + tags + end + + tags = Enum.map(tags, fn tag -> String.trim_leading(tag, "#") end) + + if Enum.empty?(explicit_tags) && add_joined_tag do + tags + |> Kernel.++([joined_tag(tags)]) + |> Enum.uniq_by(&String.downcase/1) + else + tags + end + end + + defp joined_tag(tags) do + tags + |> Enum.map(fn tag -> String.capitalize(tag) end) + |> Enum.join() end defp with_fallback(f, fallback \\ []) do diff --git a/test/web/mastodon_api/controllers/search_controller_test.exs b/test/web/mastodon_api/controllers/search_controller_test.exs index 7d0cafccc..498290377 100644 --- a/test/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/web/mastodon_api/controllers/search_controller_test.exs @@ -75,6 +75,40 @@ test "search", %{conn: conn} do assert status["id"] == to_string(activity.id) end + test "constructs hashtags from search query", %{conn: conn} do + results = + conn + |> get("/api/v2/search?#{URI.encode_query(%{q: "some text with #explicit #hashtags"})}") + |> json_response_and_validate_schema(200) + + assert results["hashtags"] == [ + %{"name" => "explicit", "url" => "#{Web.base_url()}/tag/explicit"}, + %{"name" => "hashtags", "url" => "#{Web.base_url()}/tag/hashtags"} + ] + + results = + conn + |> get("/api/v2/search?#{URI.encode_query(%{q: "john doe JOHN DOE"})}") + |> json_response_and_validate_schema(200) + + assert results["hashtags"] == [ + %{"name" => "john", "url" => "#{Web.base_url()}/tag/john"}, + %{"name" => "doe", "url" => "#{Web.base_url()}/tag/doe"}, + %{"name" => "JohnDoe", "url" => "#{Web.base_url()}/tag/JohnDoe"} + ] + + results = + conn + |> get("/api/v2/search?#{URI.encode_query(%{q: "accident-prone"})}") + |> json_response_and_validate_schema(200) + + assert results["hashtags"] == [ + %{"name" => "accident", "url" => "#{Web.base_url()}/tag/accident"}, + %{"name" => "prone", "url" => "#{Web.base_url()}/tag/prone"}, + %{"name" => "AccidentProne", "url" => "#{Web.base_url()}/tag/AccidentProne"} + ] + end + test "excludes a blocked users from search results", %{conn: conn} do user = insert(:user) user_smith = insert(:user, %{nickname: "Agent", name: "I love 2hu"}) @@ -179,7 +213,7 @@ test "search", %{conn: conn} do [account | _] = results["accounts"] assert account["id"] == to_string(user_three.id) - assert results["hashtags"] == [] + assert results["hashtags"] == ["2hu"] [status] = results["statuses"] assert status["id"] == to_string(activity.id) From 24f40b8a26f95ee7f50b6023176d361660ceb35c Mon Sep 17 00:00:00 2001 From: Ivan Tashkinov Date: Sat, 30 May 2020 10:29:08 +0300 Subject: [PATCH 2/2] [#1794] Fixed search query splitting regex to deal with Unicode. Adjusted a test. --- lib/pleroma/web/mastodon_api/controllers/search_controller.ex | 2 +- test/web/mastodon_api/controllers/search_controller_test.exs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 23fe378a6..8840fc19c 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -124,7 +124,7 @@ defp resource_search(:v1, "hashtags", query, _options) do defp prepare_tags(query, add_joined_tag \\ true) do tags = query - |> String.split(~r/[^#\w]+/, trim: true) + |> String.split(~r/[^#\w]+/u, trim: true) |> Enum.uniq_by(&String.downcase/1) explicit_tags = Enum.filter(tags, fn tag -> String.starts_with?(tag, "#") end) diff --git a/test/web/mastodon_api/controllers/search_controller_test.exs b/test/web/mastodon_api/controllers/search_controller_test.exs index 498290377..84d46895e 100644 --- a/test/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/web/mastodon_api/controllers/search_controller_test.exs @@ -71,6 +71,10 @@ test "search", %{conn: conn} do get(conn, "/api/v2/search?q=天子") |> json_response_and_validate_schema(200) + assert results["hashtags"] == [ + %{"name" => "天子", "url" => "#{Web.base_url()}/tag/天子"} + ] + [status] = results["statuses"] assert status["id"] == to_string(activity.id) end