Teren-app/app/Services/Documents/DocxTemplateRenderer.php

<?php

namespace App\Services\Documents;

use App\Models\Contract;
use App\Models\DocumentTemplate;
use App\Models\User;
use App\Services\Documents\Exceptions\UnresolvedTokensException;
use Carbon\Carbon;
use Illuminate\Support\Facades\Storage;
use Illuminate\Support\Str;
use ZipArchive;

class DocxTemplateRenderer
{
    public function __construct(
        private TokenScanner $scanner = new TokenScanner,
        private TokenValueResolver $resolver = new TokenValueResolver,
    ) {}

    /**
     * @return array{fileName:string,relativePath:string,size:int,checksum:string}
     */
    public function render(DocumentTemplate $template, Contract $contract, User $user): array
    {
        $disk = 'public';
        $templateStream = Storage::disk($disk)->get($template->file_path);

        // Work in temp file
        $tmpIn = tempnam(sys_get_temp_dir(), 'tmpl');
        file_put_contents($tmpIn, $templateStream);

        $zip = new ZipArchive;
        $openResult = $zip->open($tmpIn);
        if ($openResult !== true) {
            throw new \RuntimeException('Ne morem odpreti DOCX arhiva: code '.$openResult);
        }
        $docXml = $zip->getFromName('word/document.xml');
        if ($docXml === false) {
            throw new \RuntimeException('Manjkajoča document.xml');
        }

        // Collect all XML parts we should scan/replace: document + headers/footers + footnotes/endnotes/comments
        $parts = [];
        $parts['word/document.xml'] = $docXml;
        for ($i = 0; $i < $zip->numFiles; $i++) {
            $name = $zip->getNameIndex($i);
            if (! is_string($name)) {
                continue;
            }
            if (preg_match('#^word/(header\d*|footer\d*|footnotes|endnotes|comments)\.xml$#i', $name)) {
                $xml = $zip->getFromName($name);
                if ($xml !== false) {
                    $parts[$name] = $xml;
                }
            }
        }

        // Keep originals for safe fallback on write if normalization yields invalid XML
        $originalParts = $parts;
        // Normalize each part for scanning and replacement
        $scanParts = [];
        foreach ($parts as $name => $xml) {
            $normalized = $this->normalizeRunsForTokens($xml);
            $scanParts[$name] = $normalized;   // used for scanning tokens
            $parts[$name] = $normalized;       // used for replacement/write-back
        }

        // Scan tokens across all parts (merge default scanner + brace-aware split-run scanner + text-only scanner)
        $tokens = [];
        foreach ($scanParts as $xml) {
            $found = $this->scanner->scan($xml);
            if ($found) {
                $tokens = array_merge($tokens, $found);
            }
            $foundSplit = $this->scanBraceTokens($xml);
            if ($foundSplit) {
                $tokens = array_merge($tokens, $foundSplit);
            }
            $foundText = $this->scanTextOnlyTokens($xml);
            if ($foundText) {
                $tokens = array_merge($tokens, $foundText);
            }
        }
        $tokens = array_values(array_unique($tokens));
        try {
            logger()->info('DocxTemplateRenderer scan', [
                'template_id' => $template->id,
                'template_slug' => $template->slug,
                'template_version' => $template->version,
                'file_path' => $template->file_path,
                'tokens_found' => count($tokens),
            ]);
        } catch (\Throwable $e) {
            // swallow logging errors
        }

        // Policy: template flag -> global settings -> config; allow per-request override
        $docSettings = app(\App\Services\Documents\DocumentSettings::class)->get();
        $effectivePolicy = $template->fail_on_unresolved ? 'fail' : ($docSettings->unresolved_policy ?? config('documents.unresolved_policy', 'fail'));
        $reqPolicy = request()->input('unresolved_policy');
        if (in_array($reqPolicy, ['fail', 'keep', 'blank'], true)) {
            $effectivePolicy = $reqPolicy;
        }

        // Resolve values
        $customOverrides = request()->input('custom', []);
        $customDefaults = is_array($template->meta['custom_defaults'] ?? null) ? $template->meta['custom_defaults'] : null;
        $resolved = $this->resolver->resolve(
            $tokens,
            $template,
            $contract,
            $user,
            $effectivePolicy,
            is_array($customOverrides) ? $customOverrides : [],
            $customDefaults,
            'empty'
        );
        $values = $resolved['values'];
        $initialUnresolved = $resolved['unresolved'];
        $customTypes = $resolved['customTypes'] ?? [];

        // Explicit per-token overrides (e.g., address choices)
        $tokenOverrides = request()->input('token_overrides', []);
        if (is_array($tokenOverrides) && ! empty($tokenOverrides)) {
            foreach ($tokenOverrides as $tok => $val) {
                if ($tok && (is_scalar($val) || $val === null)) {
                    $values[(string) $tok] = (string) ($val ?? '');
                }
            }
        }

        // Formatting
        $fmt = $template->formatting_options ?? [];
        $decimals = (int) ($fmt['number_decimals'] ?? 2);
        $decSep = $fmt['decimal_separator'] ?? '.';
        $thouSep = $fmt['thousands_separator'] ?? ',';
        $currencySymbol = $fmt['currency_symbol'] ?? null;
        $currencyPos = $fmt['currency_position'] ?? 'before';
        $currencySpace = (bool) ($fmt['currency_space'] ?? false);
        $globalDateFormats = $docSettings->date_formats ?? [];
        foreach ($values as $k => $v) {
            $isTypedDate = ($customTypes[$k] ?? null) === 'date';
            $isTypedNumber = ($customTypes[$k] ?? null) === 'number';
            if (is_string($v) && ($isTypedDate || $k === 'generation.date' || preg_match('/(^|\.)[A-Za-z_]*date$/i', $k))) {
                $dateFmtOverrides = $fmt['date_formats'] ?? [];
                $desiredFormat = $dateFmtOverrides[$k]
                    ?? ($globalDateFormats[$k] ?? null)
                    ?? ($fmt['default_date_format'] ?? null)
                    ?? ($template->date_format ?: null)
                    ?? ($docSettings->date_format ?? null)
                    ?? config('documents.date_format', 'Y-m-d');
                if ($desiredFormat) {
                    try {
                        $dt = Carbon::parse($v);
                        $values[$k] = $dt->format($desiredFormat);

                        continue;
                    } catch (\Throwable $e) {
                        // ignore
                    }
                }
            }
            $isFinanceField = (bool) preg_match('/(^|\.)\b(amount|balance|total|price|cost)\b$/i', $k);
            if (($isTypedNumber || $isFinanceField) && is_numeric($v)) {
                $num = number_format((float) $v, $decimals, $decSep, $thouSep);
                if ($currencySymbol && $isFinanceField) {
                    $space = $currencySpace ? ' ' : '';
                    $num = $currencyPos === 'after' ? ($num.$space.$currencySymbol) : ($currencySymbol.$space.$num);
                }
                $values[$k] = $num;
            }
        }

        // Add unresolved tokens found in document but not produced in values
        $resolvedTokens = array_keys($values);
        $unresolvedFromDoc = array_values(array_diff($tokens, $resolvedTokens));
        if (! empty($unresolvedFromDoc)) {
            $initialUnresolved = array_values(array_unique(array_merge($initialUnresolved, $unresolvedFromDoc)));
        }

        // Replace tokens in each part: support {{token}} and {token}, allow surrounding whitespace
        foreach ($parts as $name => $xml) {
            // Fast path for contiguous tokens
            foreach ($values as $token => $val) {
                $replacement = $this->sanitizeXmlText((string) $val);
                $xml = str_replace('{{'.$token.'}}', $replacement, $xml);
                $xml = str_replace('{'.$token.'}', $replacement, $xml);
                $escapedToken = preg_quote(str_replace('.', '\\.', $token), '#');
                $boundaryPatterns = [
                    '#\\{\\{\s*'.$escapedToken.'\s*\\}\\}#',
                    '#\\{\s*'.$escapedToken.'\s*\\}#',
                ];
                foreach ($boundaryPatterns as $pat) {
                    $xml = preg_replace($pat, $replacement, $xml) ?? $xml;
                }
            }

            // Slow path: single pass across brace chunks; if flattened token matches any key, replace with its value
            if (! empty($values)) {
                $xml = preg_replace_callback('#\\{\\{.*?\\}\\}|\\{[^{}]*\\}#s', function (array $m) use ($values) {
                    $chunk = $m[0];
                    $flat = preg_replace('/<[^>]+>/', '', $chunk) ?? $chunk;
                    $flat = preg_replace('/\\s+/', '', $flat) ?? $flat;
                    foreach ($values as $t => $v) {
                        if ($flat === '{{'.$t.'}}' || $flat === '{'.$t.'}') {
                            return $this->sanitizeXmlText((string) $v);
                        }
                    }

                    return $chunk;
                }, $xml) ?? $xml;
            }

            $parts[$name] = $xml;
        }

        // Handle unresolved according to policy
        if (! empty($initialUnresolved)) {
            if ($effectivePolicy === 'blank') {
                foreach (array_values(array_unique($initialUnresolved)) as $r) {
                    foreach ($parts as $name => $xml) {
                        $xml = str_replace('{{'.$r.'}}', '', $xml);
                        $xml = str_replace('{'.$r.'}', '', $xml);
                        $escaped = preg_quote(str_replace('.', '\\.', $r), '#');
                        $xml = preg_replace('#\\{\\{\s*'.$escaped.'\s*\\}\\}#', '', $xml) ?? $xml;
                        $xml = preg_replace('#\\{\s*'.$escaped.'\s*\\}#', '', $xml) ?? $xml;
                        $parts[$name] = $xml;
                    }
                }
            } elseif ($effectivePolicy === 'keep') {
                // leave as-is
            } else {
                throw new UnresolvedTokensException($initialUnresolved, 'Neuspešna zamenjava tokenov');
            }
        }

        // Ensure each XML part is well-formed, then write back to zip (fallback to original if needed)
        foreach ($parts as $name => $xml) {
            if (! $this->isWellFormedXml($xml)) {
                // Fallback to original part to avoid producing a broken DOCX (these parts typically had no tokens)
                $fallback = $originalParts[$name] ?? null;
                if (! is_string($fallback) || ! $this->isWellFormedXml($fallback)) {
                    try {
                        logger()->error('DocxTemplateRenderer invalid XML with no safe fallback', [
                            'part' => $name,
                            'template_id' => $template->id,
                            'template_version' => $template->version,
                        ]);
                    } catch (\Throwable $e) {
                    }
                    throw new \RuntimeException("Končni XML del '{$name}' ni veljaven in ni varnega nadomestnega originala.");
                }
                try {
                    logger()->warning('DocxTemplateRenderer fallback to original part', [
                        'part' => $name,
                        'template_id' => $template->id,
                        'template_version' => $template->version,
                    ]);
                } catch (\Throwable $e) {
                }
                $zip->addFromString($name, $fallback);
            } else {
                $zip->addFromString($name, $xml);
            }
        }
        $closeOk = $zip->close();
        if ($closeOk !== true) {
            throw new \RuntimeException('Zapiranje DOCX arhiva ni uspelo.');
        }

        $output = file_get_contents($tmpIn);
        if ($output === false) {
            throw new \RuntimeException('Bralni izhod iz začasne DOCX datoteke je spodletel.');
        }
        $checksum = hash('sha256', $output);
        $size = strlen($output);

        // Filename & date format
        $pattern = $template->output_filename_pattern
            ?: ($docSettings->file_name_pattern ?? config('documents.file_name_pattern'));
        $dateFormat = $template->date_format
            ?: ($docSettings->date_format ?? config('documents.date_format', 'Y-m-d'));
        $replacements = [
            '{slug}' => $template->slug,
            '{version}' => 'v'.$template->version,
            '{generation.date}' => now()->format($dateFormat),
            '{generation.timestamp}' => (string) now()->timestamp,
        ];
        foreach ($values as $token => $val) {
            $replacements['{'.$token.'}'] = Str::slug((string) $val) ?: 'value';
        }
        $fileName = strtr($pattern, $replacements);
        if (! str_ends_with(strtolower($fileName), '.docx')) {
            $fileName .= '.docx';
        }
        $relativeDir = 'contracts/'.$contract->uuid.'/generated/'.now()->toDateString();
        $relativePath = $relativeDir.'/'.$fileName;
        Storage::disk($disk)->put($relativePath, $output);

        return [
            'fileName' => $fileName,
            'relativePath' => $relativePath,
            'size' => $size,
            'checksum' => $checksum,
            'stats' => [
                'tokensFound' => count($tokens),
                'resolvedCount' => count(array_intersect(array_keys($values), $tokens)),
                'unresolved' => array_values(array_unique($initialUnresolved)),
            ],
        ];
    }

    /**
     * Word may split tokens like {{client.person.full_name}} across multiple <w:r><w:t> runs.
     * This method removes common run/element boundaries that appear between token braces so
     * the scanner can find contiguous token strings.
     */
    private function normalizeRunsForTokens(string $xml): string
    {
        // Non-destructive normalization: remove proofing markers and invisible characters only
        $xml = preg_replace('#<w:proofErr[^>]*/>#i', '', $xml) ?? $xml;
        $xml = str_replace(["\xE2\x80\x8B", "\xC2\xAD"], '', $xml); // zero-width space, soft hyphen

        return $xml;
    }

    /**
     * If normalization produced sequences like "<w:t>   <w:t>" or "</w:t></w:t>", fix them.
     */
    private function fixNestedTextTags(string $xml): string
    {
        // No-op: we no longer restructure <w:t> tags in normalization
        return $xml;
    }

    /**
     * Simple well-formedness check using DOMDocument.
     */
    private function isWellFormedXml(string $xml): bool
    {
        $dom = new \DOMDocument('1.0', 'UTF-8');
        $dom->preserveWhiteSpace = true;
        $dom->formatOutput = false;

        return @($dom->loadXML($xml, LIBXML_NOERROR | LIBXML_NOWARNING)) !== false;
    }

    /**
     * Prepare text for safe inclusion in Word XML content.
     */
    private function sanitizeXmlText(string $text): string
    {
        // Remove characters not allowed in XML 1.0
        $text = preg_replace('/[^\x09\x0A\x0D\x20-\x{D7FF}\x{E000}-\x{FFFD}]/u', '', $text) ?? $text;

        return htmlspecialchars($text, ENT_QUOTES | ENT_XML1, 'UTF-8');
    }

    /**
     * Aggressive text-only token scan: strips all tags and searches for braces pairs in the raw text.
     * Useful when tokens are heavily split across runs.
     *
     * @return string[]
     */
    private function scanTextOnlyTokens(string $xml): array
    {
        $text = preg_replace('/<[^>]+>/', '', $xml) ?? $xml;
        $found = [];
        if (preg_match_all('/\{\{([^}]+)\}\}/s', $text, $m1)) {
            foreach ($m1[1] as $inner) {
                $tok = preg_replace('/\s+/', '', $inner) ?? $inner;
                if ($tok !== '' && preg_match('/^[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+$/', $tok)) {
                    $found[] = $tok;
                }
            }
        }
        if (preg_match_all('/\{([^{}]+)\}/s', $text, $m2)) {
            foreach ($m2[1] as $inner) {
                $tok = preg_replace('/\s+/', '', $inner) ?? $inner;
                if ($tok !== '' && preg_match('/^[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+$/', $tok)) {
                    $found[] = $tok;
                }
            }
        }

        return array_values(array_unique($found));
    }

    /**
     * Finds tokens inside brace pairs even when Word has split them across runs.
     * Strips XML tags from within braces and collapses whitespace to detect valid token patterns.
     *
     * @return string[]
     */
    private function scanBraceTokens(string $xml): array
    {
        $tokens = [];
        if (! preg_match_all('/\{\{.*?\}\}|\{[^{}]*\}/s', $xml, $matches)) {
            return $tokens;
        }
        foreach ($matches[0] as $chunk) {
            $isDouble = str_starts_with($chunk, '{{');
            $inner = substr($chunk, $isDouble ? 2 : 1, $isDouble ? -2 : -1);
            // Remove XML tags and whitespace inside braces
            $clean = preg_replace('/<[^>]+>/', '', $inner) ?? $inner;
            $clean = preg_replace('/\s+/', '', $clean) ?? $clean;
            // Accept nested dotted tokens, allow dash in final segment
            if ($clean !== '' && preg_match('/^[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+$/', $clean)) {
                $tokens[] = $clean;
            }
        }

        return array_values(array_unique($tokens));
    }
}