Document gen fixed

2025-10-12 17:52:17 +02:00
parent e0303ece74
commit 23f2011e33
16 changed files with 1116 additions and 88 deletions
@@ -31,18 +31,80 @@ public function render(DocumentTemplate $template, Contract $contract, User $use
        file_put_contents($tmpIn, $templateStream);

        $zip = new ZipArchive;
-        $zip->open($tmpIn);
+        $openResult = $zip->open($tmpIn);
+        if ($openResult !== true) {
+            throw new \RuntimeException('Ne morem odpreti DOCX arhiva: code '.$openResult);
+        }
        $docXml = $zip->getFromName('word/document.xml');
        if ($docXml === false) {
            throw new \RuntimeException('Manjkajoča document.xml');
        }

-        $tokens = $this->scanner->scan($docXml);
-        // Determine effective unresolved policy early (template override -> global -> config)
-        $globalSettingsEarly = app(\App\Services\Documents\DocumentSettings::class)->get();
-        $effectivePolicy = $template->fail_on_unresolved ? 'fail' : ($globalSettingsEarly->unresolved_policy ?? config('documents.unresolved_policy', 'fail'));
-        // Resolve with support for custom.* tokens: per-generation overrides and defaults from template meta or global settings.
-        $customOverrides = request()->input('custom', []); // if called via HTTP context; otherwise pass explicitly from caller
+        // Collect all XML parts we should scan/replace: document + headers/footers + footnotes/endnotes/comments
+        $parts = [];
+        $parts['word/document.xml'] = $docXml;
+        for ($i = 0; $i < $zip->numFiles; $i++) {
+            $name = $zip->getNameIndex($i);
+            if (! is_string($name)) {
+                continue;
+            }
+            if (preg_match('#^word/(header\d*|footer\d*|footnotes|endnotes|comments)\.xml$#i', $name)) {
+                $xml = $zip->getFromName($name);
+                if ($xml !== false) {
+                    $parts[$name] = $xml;
+                }
+            }
+        }
+
+        // Keep originals for safe fallback on write if normalization yields invalid XML
+        $originalParts = $parts;
+        // Normalize each part for scanning and replacement
+        $scanParts = [];
+        foreach ($parts as $name => $xml) {
+            $normalized = $this->normalizeRunsForTokens($xml);
+            $scanParts[$name] = $normalized;   // used for scanning tokens
+            $parts[$name] = $normalized;       // used for replacement/write-back
+        }
+
+        // Scan tokens across all parts (merge default scanner + brace-aware split-run scanner + text-only scanner)
+        $tokens = [];
+        foreach ($scanParts as $xml) {
+            $found = $this->scanner->scan($xml);
+            if ($found) {
+                $tokens = array_merge($tokens, $found);
+            }
+            $foundSplit = $this->scanBraceTokens($xml);
+            if ($foundSplit) {
+                $tokens = array_merge($tokens, $foundSplit);
+            }
+            $foundText = $this->scanTextOnlyTokens($xml);
+            if ($foundText) {
+                $tokens = array_merge($tokens, $foundText);
+            }
+        }
+        $tokens = array_values(array_unique($tokens));
+        try {
+            logger()->info('DocxTemplateRenderer scan', [
+                'template_id' => $template->id,
+                'template_slug' => $template->slug,
+                'template_version' => $template->version,
+                'file_path' => $template->file_path,
+                'tokens_found' => count($tokens),
+            ]);
+        } catch (\Throwable $e) {
+            // swallow logging errors
+        }
+
+        // Policy: template flag -> global settings -> config; allow per-request override
+        $docSettings = app(\App\Services\Documents\DocumentSettings::class)->get();
+        $effectivePolicy = $template->fail_on_unresolved ? 'fail' : ($docSettings->unresolved_policy ?? config('documents.unresolved_policy', 'fail'));
+        $reqPolicy = request()->input('unresolved_policy');
+        if (in_array($reqPolicy, ['fail', 'keep', 'blank'], true)) {
+            $effectivePolicy = $reqPolicy;
+        }
+
+        // Resolve values
+        $customOverrides = request()->input('custom', []);
        $customDefaults = is_array($template->meta['custom_defaults'] ?? null) ? $template->meta['custom_defaults'] : null;
        $resolved = $this->resolver->resolve(
            $tokens,
@@ -57,7 +119,18 @@ public function render(DocumentTemplate $template, Contract $contract, User $use
        $values = $resolved['values'];
        $initialUnresolved = $resolved['unresolved'];
        $customTypes = $resolved['customTypes'] ?? [];
-        // Formatting options
+
+        // Explicit per-token overrides (e.g., address choices)
+        $tokenOverrides = request()->input('token_overrides', []);
+        if (is_array($tokenOverrides) && ! empty($tokenOverrides)) {
+            foreach ($tokenOverrides as $tok => $val) {
+                if ($tok && (is_scalar($val) || $val === null)) {
+                    $values[(string) $tok] = (string) ($val ?? '');
+                }
+            }
+        }
+
+        // Formatting
        $fmt = $template->formatting_options ?? [];
        $decimals = (int) ($fmt['number_decimals'] ?? 2);
        $decSep = $fmt['decimal_separator'] ?? '.';
@@ -65,83 +138,155 @@ public function render(DocumentTemplate $template, Contract $contract, User $use
        $currencySymbol = $fmt['currency_symbol'] ?? null;
        $currencyPos = $fmt['currency_position'] ?? 'before';
        $currencySpace = (bool) ($fmt['currency_space'] ?? false);
-        $globalSettings = app(\App\Services\Documents\DocumentSettings::class)->get();
-        $globalDateFormats = $globalSettings->date_formats ?? [];
+        $globalDateFormats = $docSettings->date_formats ?? [];
        foreach ($values as $k => $v) {
            $isTypedDate = ($customTypes[$k] ?? null) === 'date';
            $isTypedNumber = ($customTypes[$k] ?? null) === 'number';
-            // Date formatting (typed or heuristic based on key ending with _date or .date)
            if (is_string($v) && ($isTypedDate || $k === 'generation.date' || preg_match('/(^|\.)[A-Za-z_]*date$/i', $k))) {
                $dateFmtOverrides = $fmt['date_formats'] ?? [];
                $desiredFormat = $dateFmtOverrides[$k]
                    ?? ($globalDateFormats[$k] ?? null)
                    ?? ($fmt['default_date_format'] ?? null)
                    ?? ($template->date_format ?: null)
-                    ?? ($globalSettings->date_format ?? null)
+                    ?? ($docSettings->date_format ?? null)
                    ?? config('documents.date_format', 'Y-m-d');
                if ($desiredFormat) {
                    try {
                        $dt = Carbon::parse($v);
                        $values[$k] = $dt->format($desiredFormat);

-                        continue; // skip numeric detection below
+                        continue;
                    } catch (\Throwable $e) {
-                        // swallow
+                        // ignore
                    }
                }
            }
-            // Number formatting: only for explicitly typed numbers or common monetary fields
            $isFinanceField = (bool) preg_match('/(^|\.)\b(amount|balance|total|price|cost)\b$/i', $k);
            if (($isTypedNumber || $isFinanceField) && is_numeric($v)) {
                $num = number_format((float) $v, $decimals, $decSep, $thouSep);
                if ($currencySymbol && $isFinanceField) {
                    $space = $currencySpace ? ' ' : '';
-                    if ($currencyPos === 'after') {
-                        $num = $num.$space.$currencySymbol;
-                    } else {
-                        $num = $currencySymbol.$space.$num;
-                    }
+                    $num = $currencyPos === 'after' ? ($num.$space.$currencySymbol) : ($currencySymbol.$space.$num);
                }
                $values[$k] = $num;
            }
        }
-        // Replace tokens
-        foreach ($values as $token => $val) {
-            $docXml = str_replace('{{'.$token.'}}', htmlspecialchars($val), $docXml);
+
+        // Add unresolved tokens found in document but not produced in values
+        $resolvedTokens = array_keys($values);
+        $unresolvedFromDoc = array_values(array_diff($tokens, $resolvedTokens));
+        if (! empty($unresolvedFromDoc)) {
+            $initialUnresolved = array_values(array_unique(array_merge($initialUnresolved, $unresolvedFromDoc)));
        }
-        // After replacement: check unresolved patterns
+
+        // Replace tokens in each part: support {{token}} and {token}, allow surrounding whitespace
+        foreach ($parts as $name => $xml) {
+            // Fast path for contiguous tokens
+            foreach ($values as $token => $val) {
+                $replacement = $this->sanitizeXmlText((string) $val);
+                $xml = str_replace('{{'.$token.'}}', $replacement, $xml);
+                $xml = str_replace('{'.$token.'}', $replacement, $xml);
+                $escapedToken = preg_quote(str_replace('.', '\\.', $token), '#');
+                $boundaryPatterns = [
+                    '#\\{\\{\s*'.$escapedToken.'\s*\\}\\}#',
+                    '#\\{\s*'.$escapedToken.'\s*\\}#',
+                ];
+                foreach ($boundaryPatterns as $pat) {
+                    $xml = preg_replace($pat, $replacement, $xml) ?? $xml;
+                }
+            }
+
+            // Slow path: single pass across brace chunks; if flattened token matches any key, replace with its value
+            if (! empty($values)) {
+                $xml = preg_replace_callback('#\\{\\{.*?\\}\\}|\\{[^{}]*\\}#s', function (array $m) use ($values) {
+                    $chunk = $m[0];
+                    $flat = preg_replace('/<[^>]+>/', '', $chunk) ?? $chunk;
+                    $flat = preg_replace('/\\s+/', '', $flat) ?? $flat;
+                    foreach ($values as $t => $v) {
+                        if ($flat === '{{'.$t.'}}' || $flat === '{'.$t.'}') {
+                            return $this->sanitizeXmlText((string) $v);
+                        }
+                    }
+
+                    return $chunk;
+                }, $xml) ?? $xml;
+            }
+
+            $parts[$name] = $xml;
+        }
+
+        // Handle unresolved according to policy
        if (! empty($initialUnresolved)) {
            if ($effectivePolicy === 'blank') {
-                foreach ($initialUnresolved as $r) {
-                    $docXml = str_replace('{{'.$r.'}}', '', $docXml);
+                foreach (array_values(array_unique($initialUnresolved)) as $r) {
+                    foreach ($parts as $name => $xml) {
+                        $xml = str_replace('{{'.$r.'}}', '', $xml);
+                        $xml = str_replace('{'.$r.'}', '', $xml);
+                        $escaped = preg_quote(str_replace('.', '\\.', $r), '#');
+                        $xml = preg_replace('#\\{\\{\s*'.$escaped.'\s*\\}\\}#', '', $xml) ?? $xml;
+                        $xml = preg_replace('#\\{\s*'.$escaped.'\s*\\}#', '', $xml) ?? $xml;
+                        $parts[$name] = $xml;
+                    }
                }
            } elseif ($effectivePolicy === 'keep') {
-                // keep unresolved markers
-            } else { // fail
+                // leave as-is
+            } else {
                throw new UnresolvedTokensException($initialUnresolved, 'Neuspešna zamenjava tokenov');
            }
        }

-        $zip->addFromString('word/document.xml', $docXml);
-        $zip->close();
+        // Ensure each XML part is well-formed, then write back to zip (fallback to original if needed)
+        foreach ($parts as $name => $xml) {
+            if (! $this->isWellFormedXml($xml)) {
+                // Fallback to original part to avoid producing a broken DOCX (these parts typically had no tokens)
+                $fallback = $originalParts[$name] ?? null;
+                if (! is_string($fallback) || ! $this->isWellFormedXml($fallback)) {
+                    try {
+                        logger()->error('DocxTemplateRenderer invalid XML with no safe fallback', [
+                            'part' => $name,
+                            'template_id' => $template->id,
+                            'template_version' => $template->version,
+                        ]);
+                    } catch (\Throwable $e) {
+                    }
+                    throw new \RuntimeException("Končni XML del '{$name}' ni veljaven in ni varnega nadomestnega originala.");
+                }
+                try {
+                    logger()->warning('DocxTemplateRenderer fallback to original part', [
+                        'part' => $name,
+                        'template_id' => $template->id,
+                        'template_version' => $template->version,
+                    ]);
+                } catch (\Throwable $e) {
+                }
+                $zip->addFromString($name, $fallback);
+            } else {
+                $zip->addFromString($name, $xml);
+            }
+        }
+        $closeOk = $zip->close();
+        if ($closeOk !== true) {
+            throw new \RuntimeException('Zapiranje DOCX arhiva ni uspelo.');
+        }

        $output = file_get_contents($tmpIn);
+        if ($output === false) {
+            throw new \RuntimeException('Bralni izhod iz začasne DOCX datoteke je spodletel.');
+        }
        $checksum = hash('sha256', $output);
        $size = strlen($output);

-        // Filename pattern & date format precedence: template override -> global settings -> config fallback
-        $globalSettings = $globalSettings ?? app(\App\Services\Documents\DocumentSettings::class)->get();
+        // Filename & date format
        $pattern = $template->output_filename_pattern
-            ?: ($globalSettings->file_name_pattern ?? config('documents.file_name_pattern'));
+            ?: ($docSettings->file_name_pattern ?? config('documents.file_name_pattern'));
        $dateFormat = $template->date_format
-            ?: ($globalSettings->date_format ?? config('documents.date_format', 'Y-m-d'));
+            ?: ($docSettings->date_format ?? config('documents.date_format', 'Y-m-d'));
        $replacements = [
            '{slug}' => $template->slug,
            '{version}' => 'v'.$template->version,
            '{generation.date}' => now()->format($dateFormat),
            '{generation.timestamp}' => (string) now()->timestamp,
        ];
-        // Also allow any token ({{x.y}}) style replaced pattern variants: convert {contract.reference}
        foreach ($values as $token => $val) {
            $replacements['{'.$token.'}'] = Str::slug((string) $val) ?: 'value';
        }
@@ -158,6 +303,114 @@ public function render(DocumentTemplate $template, Contract $contract, User $use
            'relativePath' => $relativePath,
            'size' => $size,
            'checksum' => $checksum,
+            'stats' => [
+                'tokensFound' => count($tokens),
+                'resolvedCount' => count(array_intersect(array_keys($values), $tokens)),
+                'unresolved' => array_values(array_unique($initialUnresolved)),
+            ],
        ];
    }
+
+    /**
+     * Word may split tokens like {{client.person.full_name}} across multiple <w:r><w:t> runs.
+     * This method removes common run/element boundaries that appear between token braces so
+     * the scanner can find contiguous token strings.
+     */
+    private function normalizeRunsForTokens(string $xml): string
+    {
+        // Non-destructive normalization: remove proofing markers and invisible characters only
+        $xml = preg_replace('#<w:proofErr[^>]*/>#i', '', $xml) ?? $xml;
+        $xml = str_replace(["\xE2\x80\x8B", "\xC2\xAD"], '', $xml); // zero-width space, soft hyphen
+
+        return $xml;
+    }
+
+    /**
+     * If normalization produced sequences like "<w:t>   <w:t>" or "</w:t></w:t>", fix them.
+     */
+    private function fixNestedTextTags(string $xml): string
+    {
+        // No-op: we no longer restructure <w:t> tags in normalization
+        return $xml;
+    }
+
+    /**
+     * Simple well-formedness check using DOMDocument.
+     */
+    private function isWellFormedXml(string $xml): bool
+    {
+        $dom = new \DOMDocument('1.0', 'UTF-8');
+        $dom->preserveWhiteSpace = true;
+        $dom->formatOutput = false;
+
+        return @($dom->loadXML($xml, LIBXML_NOERROR | LIBXML_NOWARNING)) !== false;
+    }
+
+    /**
+     * Prepare text for safe inclusion in Word XML content.
+     */
+    private function sanitizeXmlText(string $text): string
+    {
+        // Remove characters not allowed in XML 1.0
+        $text = preg_replace('/[^\x09\x0A\x0D\x20-\x{D7FF}\x{E000}-\x{FFFD}]/u', '', $text) ?? $text;
+
+        return htmlspecialchars($text, ENT_QUOTES | ENT_XML1, 'UTF-8');
+    }
+
+    /**
+     * Aggressive text-only token scan: strips all tags and searches for braces pairs in the raw text.
+     * Useful when tokens are heavily split across runs.
+     *
+     * @return string[]
+     */
+    private function scanTextOnlyTokens(string $xml): array
+    {
+        $text = preg_replace('/<[^>]+>/', '', $xml) ?? $xml;
+        $found = [];
+        if (preg_match_all('/\{\{([^}]+)\}\}/s', $text, $m1)) {
+            foreach ($m1[1] as $inner) {
+                $tok = preg_replace('/\s+/', '', $inner) ?? $inner;
+                if ($tok !== '' && preg_match('/^[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+$/', $tok)) {
+                    $found[] = $tok;
+                }
+            }
+        }
+        if (preg_match_all('/\{([^{}]+)\}/s', $text, $m2)) {
+            foreach ($m2[1] as $inner) {
+                $tok = preg_replace('/\s+/', '', $inner) ?? $inner;
+                if ($tok !== '' && preg_match('/^[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+$/', $tok)) {
+                    $found[] = $tok;
+                }
+            }
+        }
+
+        return array_values(array_unique($found));
+    }
+
+    /**
+     * Finds tokens inside brace pairs even when Word has split them across runs.
+     * Strips XML tags from within braces and collapses whitespace to detect valid token patterns.
+     *
+     * @return string[]
+     */
+    private function scanBraceTokens(string $xml): array
+    {
+        $tokens = [];
+        if (! preg_match_all('/\{\{.*?\}\}|\{[^{}]*\}/s', $xml, $matches)) {
+            return $tokens;
+        }
+        foreach ($matches[0] as $chunk) {
+            $isDouble = str_starts_with($chunk, '{{');
+            $inner = substr($chunk, $isDouble ? 2 : 1, $isDouble ? -2 : -1);
+            // Remove XML tags and whitespace inside braces
+            $clean = preg_replace('/<[^>]+>/', '', $inner) ?? $inner;
+            $clean = preg_replace('/\s+/', '', $clean) ?? $clean;
+            // Accept nested dotted tokens, allow dash in final segment
+            if ($clean !== '' && preg_match('/^[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+$/', $clean)) {
+                $tokens[] = $clean;
+            }
+        }
+
+        return array_values(array_unique($tokens));
+    }
 }
@@ -4,19 +4,25 @@

 class TokenScanner
 {
-    // Allow entity.attr with attr accepting letters, digits, underscore, dot and hyphen for flexibility (e.g., custom.order-id)
-    private const REGEX = '/{{\s*([a-zA-Z0-9_]+\.[a-zA-Z0-9_.-]+)\s*}}/';
+    // Allow nested tokens like client.person.full_name or custom.order-id
+    // Pattern: entity(.[subentity])* . attribute
+    private const REGEX_DOUBLE = '/{{\s*([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+)\s*}}/';
+
+    private const REGEX_SINGLE = '/\{\s*([a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+)\s*\}/';

    /**
     * @return array<int,string>
     */
    public function scan(string $content): array
    {
-        preg_match_all(self::REGEX, $content, $m);
-        if (empty($m[1])) {
-            return [];
+        $out = [];
+        if (preg_match_all(self::REGEX_DOUBLE, $content, $m1) && ! empty($m1[1])) {
+            $out = array_merge($out, $m1[1]);
+        }
+        if (preg_match_all(self::REGEX_SINGLE, $content, $m2) && ! empty($m2[1])) {
+            $out = array_merge($out, $m2[1]);
        }

-        return array_values(array_unique($m[1]));
+        return array_values(array_unique($out));
    }
 }
@@ -42,7 +42,7 @@ public function resolve(
        $customTypes = [];
        if (isset($template->meta['custom_default_types']) && is_array($template->meta['custom_default_types'])) {
            foreach ($template->meta['custom_default_types'] as $k => $t) {
-                $t = in_array($t, ['string', 'number', 'date'], true) ? $t : 'string';
+                $t = in_array($t, ['string', 'number', 'date', 'text'], true) ? $t : 'string';
                $customTypes[(string) $k] = $t;
            }
        }
@@ -57,6 +57,17 @@ public function resolve(
        } else {
            $templateEntities = array_keys($globalWhitelist);
        }
+        // Normalize template tokens list (used as an allow-list if columns / global whitelist are not exhaustive)
+        $templateTokens = [];
+        $rawTemplateTokens = $template->tokens ?? null;
+        if (is_array($rawTemplateTokens)) {
+            $templateTokens = array_values(array_filter(array_map('strval', $rawTemplateTokens)));
+        } elseif (is_string($rawTemplateTokens)) {
+            $decoded = json_decode($rawTemplateTokens, true);
+            if (is_array($decoded)) {
+                $templateTokens = array_values(array_filter(array_map('strval', $decoded)));
+            }
+        }
        foreach ($tokens as $token) {
            [$entity,$attr] = explode('.', $token, 2);
            if ($entity === 'generation') {
@@ -93,20 +104,43 @@ public function resolve(
                continue;
            }
            if (! in_array($entity, $templateEntities, true)) {
-                if ($policy === 'fail') {
-                    throw new \RuntimeException("Nedovoljen entiteta token: $entity");
-                }
-                $unresolved[] = $token;
+                // If the token is explicitly listed on the template's tokens, allow it
+                if (! $templateTokens || ! in_array($token, $templateTokens, true)) {
+                    if ($policy === 'fail') {
+                        throw new \RuntimeException("Nedovoljen entiteta token: $entity");
+                    }
+                    $unresolved[] = $token;

-                continue;
+                    continue;
+                }
            }
            // Allowed attributes: merge template-declared columns with global whitelist (config + DB settings)
-            // Rationale: old templates may not list newly allowed attributes (like nested paths),
-            // so we honor both sources instead of preferring one exclusively.
-            $allowedFromTemplate = $template->columns[$entity] ?? [];
+            // Support nested dotted attributes (e.g. person.person_address.city). We allow if either the full
+            // dotted path is listed or if the base prefix is listed (e.g. person.person_address) and the resolver
+            // can handle it.
+            // Safely read template-declared columns
+            $columns = is_array($template->columns ?? null) ? $template->columns : [];
+            $allowedFromTemplate = $columns[$entity] ?? [];
            $allowedFromGlobal = $globalWhitelist[$entity] ?? [];
            $allowed = array_values(array_unique(array_merge($allowedFromTemplate, $allowedFromGlobal)));
-            if (! in_array($attr, $allowed, true)) {
+            $isAllowed = in_array($attr, $allowed, true);
+            if (! $isAllowed && str_contains($attr, '.')) {
+                // Check progressive prefixes: a.b.c -> a.b
+                $parts = explode('.', $attr);
+                while (count($parts) > 1 && ! $isAllowed) {
+                    array_pop($parts);
+                    $prefix = implode('.', $parts);
+                    if (in_array($prefix, $allowed, true)) {
+                        $isAllowed = true;
+                        break;
+                    }
+                }
+            }
+            // If still not allowed, permit tokens explicitly scanned/stored on the template
+            if (! $isAllowed && $templateTokens) {
+                $isAllowed = in_array($token, $templateTokens, true);
+            }
+            if (! $isAllowed) {
                if ($policy === 'fail') {
                    throw new \RuntimeException("Nedovoljen stolpec token: $token");
                }