SMS limiter

This commit is contained in:
Simon Pocrnjič
2025-10-27 19:00:00 +01:00
parent 369af34ad4
commit 20d4907fc5
2 changed files with 213 additions and 0 deletions
+100
View File
@@ -15,12 +15,112 @@ public function __construct(
protected SmsClient $client,
) {}
/**
* Normalize whitespace to avoid accidental Unicode switching and provider quirks.
* - Convert non-breaking space (U+00A0) and tabs to regular spaces
* - Preserve newlines
*/
protected function normalizeForSms(string $text): string
{
// Replace NBSP (\xC2\xA0 in UTF-8) and tabs with regular space
$text = str_replace(["\u{00A0}", "\t"], ' ', $text);
// Optionally collapse CRLF to LF (providers typically accept both); keep as-is otherwise
return $text;
}
/**
* Heuristic GSM-7 detection: treat any codepoint >= 0x80 as UCS-2 except which is allowed via extension table.
*/
protected function isGsm7(string $text): bool
{
$len = mb_strlen($text, 'UTF-8');
for ($i = 0; $i < $len; $i++) {
$ch = mb_substr($text, $i, 1, 'UTF-8');
if ($ch === '€') {
continue;
}
// Fast ASCII check: multibyte UTF-8 means non-ASCII (>= 0x80)
if (strlen($ch) > 1) {
return false;
}
}
return true;
}
/**
* Count GSM-7 units, where extension table chars cost 2 (ESC + char).
*/
protected function gsm7Length(string $text): int
{
static $extended = null;
if ($extended === null) {
$extended = ['^', '{', '}', '\\', '[', '~', ']', '|'];
}
$len = 0;
$strlen = mb_strlen($text, 'UTF-8');
for ($i = 0; $i < $strlen; $i++) {
$ch = mb_substr($text, $i, 1, 'UTF-8');
if ($ch === '€' || in_array($ch, $extended, true)) {
$len += 2;
} else {
$len += 1;
}
}
return $len;
}
/**
* Truncate text to provider hard limits: 640 GSM-7 units or 320 UCS-2 units.
*/
public function enforceLengthLimit(string $text): string
{
$text = $this->normalizeForSms($text);
$isGsm = $this->isGsm7($text);
$limit = $isGsm ? 640 : 320;
if ($isGsm) {
// Fast-path: if within limit, return
if ($this->gsm7Length($text) <= $limit) {
return $text;
}
// Truncate respecting extension char cost
$out = '';
$acc = 0;
$strlen = mb_strlen($text, 'UTF-8');
for ($i = 0; $i < $strlen; $i++) {
$ch = mb_substr($text, $i, 1, 'UTF-8');
$cost = ($ch === '€' || in_array($ch, ['^', '{', '}', '\\', '[', '~', ']', '|'], true)) ? 2 : 1;
if ($acc + $cost > $limit) {
break;
}
$out .= $ch;
$acc += $cost;
}
return $out;
}
// UCS-2: count by UTF-16 code units; approximate via mb_substr slicing by codepoints
// We use mb_substr which handles Unicode correctly for most cases; providers count 1 per code unit.
// For BMP characters (most cases like Slovenian diacritics), 1 codepoint ~= 1 code unit.
if (mb_strlen($text, 'UTF-8') <= $limit) {
return $text;
}
return mb_substr($text, 0, $limit, 'UTF-8');
}
/**
* Send a raw text message.
*/
public function sendRaw(SmsProfile $profile, string $to, string $content, ?SmsSender $sender = null, ?string $countryCode = null, bool $deliveryReport = false, ?string $clientReference = null): SmsLog
{
return DB::transaction(function () use ($profile, $to, $content, $sender, $countryCode, $deliveryReport, $clientReference): SmsLog {
// Enforce provider hard length limits before logging/sending
$content = $this->enforceLengthLimit($content);
$log = new SmsLog([
'uuid' => (string) Str::uuid(),
'profile_id' => $profile->id,