Move url-shortener handling into it's own file

author Magnus Hagander <magnus@hagander.net>

Fri, 3 Nov 2023 11:47:27 +0000 (12:47 +0100)

committer Magnus Hagander <magnus@hagander.net>

Fri, 3 Nov 2023 11:47:27 +0000 (12:47 +0100)
author Magnus Hagander <magnus@hagander.net>
Fri, 3 Nov 2023 11:47:27 +0000 (12:47 +0100)
committer Magnus Hagander <magnus@hagander.net>
Fri, 3 Nov 2023 11:47:27 +0000 (12:47 +0100)
diff --git a/postgresqleu/confreg/backendforms.py b/postgresqleu/confreg/backendforms.py

index ad32daa937eb24675600c2d5df20308aaa96a2fd..179c441308326a679514e45773eda277f339d21e 100644 (file)
--- a/postgresqleu/confreg/backendforms.py
+++ b/postgresqleu/confreg/backendforms.py
@@ -26,7 +26,7 @@ from postgresqleu.util.widgets import TagOptionsTextWidget
  from postgresqleu.util.random import generate_random_token
  from postgresqleu.util.backendforms import BackendForm, BackendBeforeNewForm
  from postgresqleu.util.messaging import messaging_implementation_choices, get_messaging, get_messaging_class
-from postgresqleu.util.messaging.util import get_shortened_post_length
+from postgresqleu.util.messaging.short import get_shortened_post_length
  
  import postgresqleu.accounting.models
  
diff --git a/postgresqleu/confreg/campaigns.py b/postgresqleu/confreg/campaigns.py

index 64647cc761faa64f66883266ee0690bbc2340b61..456aee5602f0d125c0d249dfca87edf8e212864c 100644 (file)
--- a/postgresqleu/confreg/campaigns.py
+++ b/postgresqleu/confreg/campaigns.py
@@ -10,7 +10,7 @@ from postgresqleu.confreg.models import MessagingProvider
  from postgresqleu.confreg.twitter import post_conference_social, render_multiprovider_tweet
  from postgresqleu.confsponsor.models import Sponsor, SponsorshipLevel
  from postgresqleu.util.messaging import get_messaging, get_messaging_class
-from postgresqleu.util.messaging.util import get_shortened_post_length
+from postgresqleu.util.messaging.short import get_shortened_post_length
  
  import datetime
  import random
diff --git a/postgresqleu/util/messaging/sender.py b/postgresqleu/util/messaging/sender.py

index 7b015d78813e44761dfc305db5ef8322284f4c07..1a81bde84a06833abc87ba6583044b9814e958a6 100644 (file)
--- a/postgresqleu/util/messaging/sender.py
+++ b/postgresqleu/util/messaging/sender.py
@@ -12,7 +12,7 @@ import sys
  from postgresqleu.confreg.models import NotificationQueue
  from postgresqleu.confreg.models import ConferenceTweetQueue, ConferenceIncomingTweet
  from postgresqleu.confreg.models import ConferenceTweetQueueErrorLog
-from postgresqleu.util.messaging.util import truncate_shortened_post
+from postgresqleu.util.messaging.short import truncate_shortened_post
  
  
  def send_pending_messages(providers):
diff --git a/postgresqleu/util/messaging/short.py b/postgresqleu/util/messaging/short.py

new file mode 100644 (file)

index 0000000..7b54528
--- /dev/null
+++ b/postgresqleu/util/messaging/short.py
@@ -0,0 +1,55 @@
+import re
+
+# Functions for working with shortened posts
+
+# This does not appear to match everything in any shape or form, but we are only
+# using it against URLs that we have typed in ourselves, so it should be easy
+# enough.
+# Should be in sync with regexp in js/admin.js
+_re_urlmatcher = re.compile(r'\bhttps?://\S+', re.I)
+
+# This is currently the value for Twitter and the default for Mastodon, so just
+# use that globally for now.
+_url_shortened_len = 23
+_url_counts_as_characters = "https://short.url/{}".format((_url_shortened_len - len("https://short.url/")) * 'x')
+
+
+def get_shortened_post_length(txt):
+    return len(_re_urlmatcher.sub(_url_counts_as_characters, txt))
+
+
+# Truncate a text, taking into account URL shorterners. WIll not truncate in the middle of an URL,
+# but right now will happily truncate in the middle of a word (room for improvement!)
+def truncate_shortened_post(txt, maxlen):
+    matches = list(_re_urlmatcher.finditer(txt))
+
+    if not matches:
+        # Not a single url, so just truncate
+        return txt[:maxlen]
+
+    firststart, firstend = matches[0].span()
+    if firststart + _url_shortened_len > maxlen:
+        # We hit the size limit before the url or in the middle of it, so skip the whole url
+        return txt[:firststart]
+
+    inlen = firstend
+    outlen = firststart + _url_shortened_len
+    for i, curr in enumerate(matches[1:]):
+        prevstart, prevend = matches[i].span()
+        currstart, currend = curr.span()
+
+        betweenlen = currstart - prevend
+        if outlen + betweenlen > maxlen:
+            # The limit was hit in the text between urls
+            left = maxlen - outlen
+            return txt[:inlen + (maxlen - outlen)]
+        if outlen + betweenlen + _url_shortened_len > maxlen:
+            # The limit was hit in the middle of this URL, so include all the text
+            # up to it, but skip the url.
+            return txt[:inlen + betweenlen]
+
+        # The whole URL fit
+        inlen += betweenlen + currend - currstart
+        outlen += betweenlen + _url_shortened_len
+
+    return txt[:inlen + (maxlen - outlen)]
diff --git a/postgresqleu/util/messaging/util.py b/postgresqleu/util/messaging/util.py

index ba736135b3a405eaa93a8b22ee7abeebdb23ab41..3f0d899c6ae7e5d8816bbe10bc885ca19eae1371 100644 (file)
--- a/postgresqleu/util/messaging/util.py
+++ b/postgresqleu/util/messaging/util.py
@@ -1,7 +1,6 @@
  from django.utils import timezone
  
  from datetime import timedelta
-import re
  
  from postgresqleu.confreg.models import NotificationQueue
  from postgresqleu.util.db import exec_no_result
@@ -83,56 +82,3 @@ def send_channel_message(messaging, channel, msg, expiry=timedelta(hours=1)):
  def notify_twitter_moderation(tweet, completed, approved):
      for messaging in tweet.conference.conferencemessaging_set.filter(socialmediamanagement=True, provider__active=True):
          get_messaging_class(messaging.provider.classname)(messaging.provider.id, messaging.provider.config).notify_twitter_moderation(messaging, tweet, completed, approved)
-
-
-# This does not appear to match everything in any shape or form, but we are only
-# using it against URLs that we have typed in ourselves, so it should be easy
-# enough.
-# Should be in sync with regexp in js/admin.js
-_re_urlmatcher = re.compile(r'\bhttps?://\S+', re.I)
-
-# This is currently the value for Twitter and the default for Mastodon, so just
-# use that globally for now.
-_url_shortened_len = 23
-_url_counts_as_characters = "https://short.url/{}".format((_url_shortened_len - len("https://short.url/")) * 'x')
-
-
-def get_shortened_post_length(txt):
-    return len(_re_urlmatcher.sub(_url_counts_as_characters, txt))
-
-
-# Truncate a text, taking into account URL shorterners. WIll not truncate in the middle of an URL,
-# but right now will happily truncate in the middle of a word (room for improvement!)
-def truncate_shortened_post(txt, maxlen):
-    matches = list(_re_urlmatcher.finditer(txt))
-
-    if not matches:
-        # Not a single url, so just truncate
-        return txt[:maxlen]
-
-    firststart, firstend = matches[0].span()
-    if firststart + _url_shortened_len > maxlen:
-        # We hit the size limit before the url or in the middle of it, so skip the whole url
-        return txt[:firststart]
-
-    inlen = firstend
-    outlen = firststart + _url_shortened_len
-    for i, curr in enumerate(matches[1:]):
-        prevstart, prevend = matches[i].span()
-        currstart, currend = curr.span()
-
-        betweenlen = currstart - prevend
-        if outlen + betweenlen > maxlen:
-            # The limit was hit in the text between urls
-            left = maxlen - outlen
-            return txt[:inlen + (maxlen - outlen)]
-        if outlen + betweenlen + _url_shortened_len > maxlen:
-            # The limit was hit in the middle of this URL, so include all the text
-            # up to it, but skip the url.
-            return txt[:inlen + betweenlen]
-
-        # The whole URL fit
-        inlen += betweenlen + currend - currstart
-        outlen += betweenlen + _url_shortened_len
-
-    return txt[:inlen + (maxlen - outlen)]
author	Magnus Hagander <magnus@hagander.net>
	Fri, 3 Nov 2023 11:47:27 +0000 (12:47 +0100)
committer	Magnus Hagander <magnus@hagander.net>
	Fri, 3 Nov 2023 11:47:27 +0000 (12:47 +0100)
postgresqleu/confreg/backendforms.py		patch \| blob \| blame \| history
postgresqleu/confreg/campaigns.py		patch \| blob \| blame \| history
postgresqleu/util/messaging/sender.py		patch \| blob \| blame \| history
postgresqleu/util/messaging/short.py	[new file with mode: 0644]	patch \| blob
postgresqleu/util/messaging/util.py		patch \| blob \| blame \| history