From a381b9bcd432d688210a2dced022cf93644a0a97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Ma=C3=B1as?= <carlos@spacenomads.com>
Date: Fri, 9 Jun 2023 19:34:30 +0200
Subject: [PATCH] fix(apreciabot): Remove super weird invisible unicode spaces

When copying the user's address from the Mac Ivory client, it adds invisible Unicode characters before and after the address. For example, "@user@mastodon.social" becomes "\u202a@user@mastodon.social\u202c". Visually, there is no difference, but it makes it impossible for the bot to locate the user.

Considering my limited knowledge of Python, I suggest using re to remove these Unicode characters before splitting the string.

100% proposition, 0% obligation.
---
 apreciabot.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/apreciabot.py b/apreciabot.py
index 19c8880..fb808d5 100644
--- a/apreciabot.py
+++ b/apreciabot.py
@@ -1,3 +1,4 @@
+import re
 from bs4 import BeautifulSoup
 from common import get_api
 from common import list_append
@@ -21,6 +22,8 @@ last_ids = list_read(bot_name)
 max_notifications=10
 new_last_ids=[]
 notifications = api.notifications(types=["mention"],limit=max_notifications)
+no_unicode_spaces_pattern = r"[\u200B-\u200D\u202A\u202C\uFEFF]"
+
 for n in notifications:
     new_last_ids.append(n['id'])
 
@@ -30,7 +33,8 @@ for i in range(0, max_notifications - 5):
     n = notifications[i]
     if str(n['id']) not in last_ids:
         # Mentions data are HTML paragraphs so we delete everything between <> to clean it up
-        content = BeautifulSoup(n['status']['content'], "html.parser").get_text().split(" ")
+        rawContent = BeautifulSoup(n['status']['content'], "html.parser").get_text()
+        content = re.sub(no_unicode_spaces_pattern, "", rawContent).split(" ")
         try:
             first_mention = content[0]
             target = "@" + content[1]