SMS limiter

2025-10-27 19:00:00 +01:00
parent 369af34ad4
commit 20d4907fc5
2 changed files with 213 additions and 0 deletions
@@ -15,12 +15,112 @@ public function __construct(
        protected SmsClient $client,
    ) {}

+    /**
+     * Normalize whitespace to avoid accidental Unicode switching and provider quirks.
+     * - Convert non-breaking space (U+00A0) and tabs to regular spaces
+     * - Preserve newlines
+     */
+    protected function normalizeForSms(string $text): string
+    {
+        // Replace NBSP (\xC2\xA0 in UTF-8) and tabs with regular space
+        $text = str_replace(["\u{00A0}", "\t"], ' ', $text);
+        // Optionally collapse CRLF to LF (providers typically accept both); keep as-is otherwise
+        return $text;
+    }
+
+    /**
+     * Heuristic GSM-7 detection: treat any codepoint >= 0x80 as UCS-2 except € which is allowed via extension table.
+     */
+    protected function isGsm7(string $text): bool
+    {
+        $len = mb_strlen($text, 'UTF-8');
+        for ($i = 0; $i < $len; $i++) {
+            $ch = mb_substr($text, $i, 1, 'UTF-8');
+            if ($ch === '€') {
+                continue;
+            }
+            // Fast ASCII check: multibyte UTF-8 means non-ASCII (>= 0x80)
+            if (strlen($ch) > 1) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+    /**
+     * Count GSM-7 units, where extension table chars cost 2 (ESC + char).
+     */
+    protected function gsm7Length(string $text): int
+    {
+        static $extended = null;
+        if ($extended === null) {
+            $extended = ['^', '{', '}', '\\', '[', '~', ']', '|'];
+        }
+
+        $len = 0;
+        $strlen = mb_strlen($text, 'UTF-8');
+        for ($i = 0; $i < $strlen; $i++) {
+            $ch = mb_substr($text, $i, 1, 'UTF-8');
+            if ($ch === '€' || in_array($ch, $extended, true)) {
+                $len += 2;
+            } else {
+                $len += 1;
+            }
+        }
+
+        return $len;
+    }
+
+    /**
+     * Truncate text to provider hard limits: 640 GSM-7 units or 320 UCS-2 units.
+     */
+    public function enforceLengthLimit(string $text): string
+    {
+        $text = $this->normalizeForSms($text);
+        $isGsm = $this->isGsm7($text);
+        $limit = $isGsm ? 640 : 320;
+
+        if ($isGsm) {
+            // Fast-path: if within limit, return
+            if ($this->gsm7Length($text) <= $limit) {
+                return $text;
+            }
+            // Truncate respecting extension char cost
+            $out = '';
+            $acc = 0;
+            $strlen = mb_strlen($text, 'UTF-8');
+            for ($i = 0; $i < $strlen; $i++) {
+                $ch = mb_substr($text, $i, 1, 'UTF-8');
+                $cost = ($ch === '€' || in_array($ch, ['^', '{', '}', '\\', '[', '~', ']', '|'], true)) ? 2 : 1;
+                if ($acc + $cost > $limit) {
+                    break;
+                }
+                $out .= $ch;
+                $acc += $cost;
+            }
+
+            return $out;
+        }
+
+        // UCS-2: count by UTF-16 code units; approximate via mb_substr slicing by codepoints
+        // We use mb_substr which handles Unicode correctly for most cases; providers count 1 per code unit.
+        // For BMP characters (most cases like Slovenian diacritics), 1 codepoint ~= 1 code unit.
+        if (mb_strlen($text, 'UTF-8') <= $limit) {
+            return $text;
+        }
+
+        return mb_substr($text, 0, $limit, 'UTF-8');
+    }
+
    /**
     * Send a raw text message.
     */
    public function sendRaw(SmsProfile $profile, string $to, string $content, ?SmsSender $sender = null, ?string $countryCode = null, bool $deliveryReport = false, ?string $clientReference = null): SmsLog
    {
        return DB::transaction(function () use ($profile, $to, $content, $sender, $countryCode, $deliveryReport, $clientReference): SmsLog {
+            // Enforce provider hard length limits before logging/sending
+            $content = $this->enforceLengthLimit($content);
            $log = new SmsLog([
                'uuid' => (string) Str::uuid(),
                'profile_id' => $profile->id,