From 0b0b1dabdfa2051d0b1d1ad846892da4801ca588 Mon Sep 17 00:00:00 2001 From: tusooa Date: Fri, 21 Jul 2023 13:54:10 -0400 Subject: [PATCH] Fix parsing non-ascii tags --- changelog.d/nonascii-tags.fix | 1 + src/services/matcher/matcher.service.js | 7 +++++-- test/unit/specs/services/matcher/matcher.spec.js | 6 ++++++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 changelog.d/nonascii-tags.fix diff --git a/changelog.d/nonascii-tags.fix b/changelog.d/nonascii-tags.fix new file mode 100644 index 00000000..e4c6dc82 --- /dev/null +++ b/changelog.d/nonascii-tags.fix @@ -0,0 +1 @@ +Fix parsing non-ascii tags diff --git a/src/services/matcher/matcher.service.js b/src/services/matcher/matcher.service.js index b6c4e909..54f02d31 100644 --- a/src/services/matcher/matcher.service.js +++ b/src/services/matcher/matcher.service.js @@ -14,8 +14,11 @@ export const mentionMatchesUrl = (attention, url) => { * @param {string} url */ export const extractTagFromUrl = (url) => { - const regex = /tag[s]*\/(\w+)$/g - const result = regex.exec(url) + const decoded = decodeURI(url) + // https://git.pleroma.social/pleroma/elixir-libraries/linkify/-/blob/master/lib/linkify/parser.ex + // https://www.pcre.org/original/doc/html/pcrepattern.html + const regex = /tag[s]*\/([\p{L}\p{N}_]*[\p{Alphabetic}_·\u{200c}][\p{L}\p{N}_·\p{M}\u{200c}]*)$/ug + const result = regex.exec(decoded) if (!result) { return false } diff --git a/test/unit/specs/services/matcher/matcher.spec.js b/test/unit/specs/services/matcher/matcher.spec.js index 7a2494f0..c6e9719d 100644 --- a/test/unit/specs/services/matcher/matcher.spec.js +++ b/test/unit/specs/services/matcher/matcher.spec.js @@ -78,5 +78,11 @@ describe('MatcherService', () => { expect(MatcherService.extractTagFromUrl(url)).to.eql(false) }) + + it('should return tag name from non-ascii tags', () => { + const url = encodeURI('https://website.com/tag/喵喵喵') + + expect(MatcherService.extractTagFromUrl(url)).to.eql('喵喵喵') + }) }) })