SMS limiter
This commit is contained in:
@@ -15,12 +15,112 @@ public function __construct(
|
||||
protected SmsClient $client,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Normalize whitespace to avoid accidental Unicode switching and provider quirks.
|
||||
* - Convert non-breaking space (U+00A0) and tabs to regular spaces
|
||||
* - Preserve newlines
|
||||
*/
|
||||
protected function normalizeForSms(string $text): string
|
||||
{
|
||||
// Replace NBSP (\xC2\xA0 in UTF-8) and tabs with regular space
|
||||
$text = str_replace(["\u{00A0}", "\t"], ' ', $text);
|
||||
// Optionally collapse CRLF to LF (providers typically accept both); keep as-is otherwise
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Heuristic GSM-7 detection: treat any codepoint >= 0x80 as UCS-2 except € which is allowed via extension table.
|
||||
*/
|
||||
protected function isGsm7(string $text): bool
|
||||
{
|
||||
$len = mb_strlen($text, 'UTF-8');
|
||||
for ($i = 0; $i < $len; $i++) {
|
||||
$ch = mb_substr($text, $i, 1, 'UTF-8');
|
||||
if ($ch === '€') {
|
||||
continue;
|
||||
}
|
||||
// Fast ASCII check: multibyte UTF-8 means non-ASCII (>= 0x80)
|
||||
if (strlen($ch) > 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count GSM-7 units, where extension table chars cost 2 (ESC + char).
|
||||
*/
|
||||
protected function gsm7Length(string $text): int
|
||||
{
|
||||
static $extended = null;
|
||||
if ($extended === null) {
|
||||
$extended = ['^', '{', '}', '\\', '[', '~', ']', '|'];
|
||||
}
|
||||
|
||||
$len = 0;
|
||||
$strlen = mb_strlen($text, 'UTF-8');
|
||||
for ($i = 0; $i < $strlen; $i++) {
|
||||
$ch = mb_substr($text, $i, 1, 'UTF-8');
|
||||
if ($ch === '€' || in_array($ch, $extended, true)) {
|
||||
$len += 2;
|
||||
} else {
|
||||
$len += 1;
|
||||
}
|
||||
}
|
||||
|
||||
return $len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate text to provider hard limits: 640 GSM-7 units or 320 UCS-2 units.
|
||||
*/
|
||||
public function enforceLengthLimit(string $text): string
|
||||
{
|
||||
$text = $this->normalizeForSms($text);
|
||||
$isGsm = $this->isGsm7($text);
|
||||
$limit = $isGsm ? 640 : 320;
|
||||
|
||||
if ($isGsm) {
|
||||
// Fast-path: if within limit, return
|
||||
if ($this->gsm7Length($text) <= $limit) {
|
||||
return $text;
|
||||
}
|
||||
// Truncate respecting extension char cost
|
||||
$out = '';
|
||||
$acc = 0;
|
||||
$strlen = mb_strlen($text, 'UTF-8');
|
||||
for ($i = 0; $i < $strlen; $i++) {
|
||||
$ch = mb_substr($text, $i, 1, 'UTF-8');
|
||||
$cost = ($ch === '€' || in_array($ch, ['^', '{', '}', '\\', '[', '~', ']', '|'], true)) ? 2 : 1;
|
||||
if ($acc + $cost > $limit) {
|
||||
break;
|
||||
}
|
||||
$out .= $ch;
|
||||
$acc += $cost;
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
// UCS-2: count by UTF-16 code units; approximate via mb_substr slicing by codepoints
|
||||
// We use mb_substr which handles Unicode correctly for most cases; providers count 1 per code unit.
|
||||
// For BMP characters (most cases like Slovenian diacritics), 1 codepoint ~= 1 code unit.
|
||||
if (mb_strlen($text, 'UTF-8') <= $limit) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
return mb_substr($text, 0, $limit, 'UTF-8');
|
||||
}
|
||||
|
||||
/**
|
||||
* Send a raw text message.
|
||||
*/
|
||||
public function sendRaw(SmsProfile $profile, string $to, string $content, ?SmsSender $sender = null, ?string $countryCode = null, bool $deliveryReport = false, ?string $clientReference = null): SmsLog
|
||||
{
|
||||
return DB::transaction(function () use ($profile, $to, $content, $sender, $countryCode, $deliveryReport, $clientReference): SmsLog {
|
||||
// Enforce provider hard length limits before logging/sending
|
||||
$content = $this->enforceLengthLimit($content);
|
||||
$log = new SmsLog([
|
||||
'uuid' => (string) Str::uuid(),
|
||||
'profile_id' => $profile->id,
|
||||
|
||||
Reference in New Issue
Block a user