Document gen fixed
This commit is contained in:
@@ -0,0 +1,152 @@
|
||||
<?php
|
||||
|
||||
namespace App\Console\Commands;
|
||||
|
||||
use App\Models\DocumentTemplate;
|
||||
use App\Services\Documents\TokenScanner;
|
||||
use Illuminate\Console\Command;
|
||||
use Illuminate\Support\Facades\Storage;
|
||||
use ZipArchive;
|
||||
|
||||
class TemplateScanCommand extends Command
|
||||
{
|
||||
protected $signature = 'template:scan {slug : Template slug} {--tpl-version= : Specific template version number} {--parts : Show per-part tokens}';
|
||||
|
||||
protected $description = 'Scan a stored DOCX template by slug/version and dump detected tokens directly from storage.';
|
||||
|
||||
public function handle(TokenScanner $scanner): int
|
||||
{
|
||||
$slug = (string) $this->argument('slug');
|
||||
$version = $this->option('tpl-version');
|
||||
|
||||
/** @var DocumentTemplate|null $template */
|
||||
$query = DocumentTemplate::query()->where('slug', $slug);
|
||||
if (! empty($version)) {
|
||||
$query->where('version', (int) $version);
|
||||
} else {
|
||||
$query->orderByDesc('version');
|
||||
}
|
||||
$template = $query->first();
|
||||
if (! $template) {
|
||||
$this->error("Template not found for slug '{$slug}'".($version ? " v{$version}" : ''));
|
||||
|
||||
return self::FAILURE;
|
||||
}
|
||||
|
||||
$disk = 'public';
|
||||
$path = $template->file_path;
|
||||
if (! $path || ! Storage::disk($disk)->exists($path)) {
|
||||
$this->error('Template file not found on disk: '.$path);
|
||||
|
||||
return self::FAILURE;
|
||||
}
|
||||
|
||||
$bytes = Storage::disk($disk)->get($path);
|
||||
$tmp = tempnam(sys_get_temp_dir(), 'tmpl');
|
||||
file_put_contents($tmp, $bytes);
|
||||
|
||||
$zip = new ZipArchive;
|
||||
if ($zip->open($tmp) !== true) {
|
||||
$this->error('Unable to open DOCX (zip).');
|
||||
|
||||
return self::FAILURE;
|
||||
}
|
||||
|
||||
// Collect parts: main + headers/footers + notes/comments
|
||||
$parts = [];
|
||||
$doc = $zip->getFromName('word/document.xml');
|
||||
if ($doc !== false) {
|
||||
$parts['word/document.xml'] = $doc;
|
||||
}
|
||||
for ($i = 0; $i < $zip->numFiles; $i++) {
|
||||
$name = $zip->getNameIndex($i);
|
||||
if (! is_string($name)) {
|
||||
continue;
|
||||
}
|
||||
if (preg_match('#^word/(header\d*|footer\d*|footnotes|endnotes|comments)\.xml$#i', $name)) {
|
||||
$xml = $zip->getFromName($name);
|
||||
if ($xml !== false) {
|
||||
$parts[$name] = $xml;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize and scan
|
||||
$all = [];
|
||||
$perPart = [];
|
||||
foreach ($parts as $name => $xml) {
|
||||
$norm = $this->normalizeRunsForTokens($xml);
|
||||
$found = $scanner->scan($norm);
|
||||
$perPart[$name] = $found;
|
||||
if ($found) {
|
||||
$all = array_merge($all, $found);
|
||||
}
|
||||
}
|
||||
$union = array_values(array_unique($all));
|
||||
|
||||
$this->info("Template: {$template->name} (slug={$template->slug}, v{$template->version})");
|
||||
$this->line('File: '.$path);
|
||||
$this->line('Tokens found (union): '.count($union));
|
||||
foreach ($union as $t) {
|
||||
$this->line(' - '.$t);
|
||||
}
|
||||
|
||||
if ($this->option('parts')) {
|
||||
$this->line('');
|
||||
$this->info('Per-part details:');
|
||||
foreach ($perPart as $n => $list) {
|
||||
$this->line("[{$n}] (".count($list).')');
|
||||
foreach ($list as $t) {
|
||||
$this->line(' - '.$t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$zip->close();
|
||||
@unlink($tmp);
|
||||
|
||||
return self::SUCCESS;
|
||||
}
|
||||
|
||||
private function normalizeRunsForTokens(string $xml): string
|
||||
{
|
||||
// Remove proofing error markers
|
||||
$xml = preg_replace('#<w:proofErr[^>]*/>#i', '', $xml) ?? $xml;
|
||||
// Collapse boundaries between runs and inside runs (include tabs/line breaks)
|
||||
$patterns = [
|
||||
'#</w:t>\s*</w:r>\s*(?:<(?:w:proofErr|w:tab|w:br)[^>]*/>\s*)*(?:<w:r[^>]*>\s*(?:<w:rPr>.*?</w:rPr>\s*)*)?<w:t[^>]*>#is',
|
||||
'#</w:t>\s*(?:<(?:w:proofErr|w:tab|w:br)[^>]*/>\s*)*<w:t[^>]*>#is',
|
||||
];
|
||||
$prev = null;
|
||||
while ($prev !== $xml) {
|
||||
$prev = $xml;
|
||||
foreach ($patterns as $pat) {
|
||||
$xml = preg_replace($pat, '', $xml) ?? $xml;
|
||||
}
|
||||
}
|
||||
// Clean inside {{ ... }}
|
||||
$xml = preg_replace_callback('/\{\{.*?\}\}/s', function (array $m) {
|
||||
$inner = substr($m[0], 2, -2);
|
||||
$inner = preg_replace('/<[^>]+>/', '', $inner) ?? $inner;
|
||||
$inner = preg_replace('/\s+/', '', $inner) ?? $inner;
|
||||
|
||||
return '{{'.$inner.'}}';
|
||||
}, $xml) ?? $xml;
|
||||
// Clean inside { ... } if it looks like a token
|
||||
$xml = preg_replace_callback('/\{[^{}]*\}/s', function (array $m) {
|
||||
$raw = $m[0];
|
||||
$inner = substr($raw, 1, -1);
|
||||
$clean = preg_replace('/<[^>]+>/', '', $inner) ?? $inner;
|
||||
$clean = preg_replace('/\s+/', '', $clean) ?? $clean;
|
||||
if (preg_match('/^[a-zA-Z0-9_]+(?:\.[a-zA-Z0-9_]+)*\.[a-zA-Z0-9_.-]+$/', $clean)) {
|
||||
return '{'.$clean.'}';
|
||||
}
|
||||
|
||||
return $raw;
|
||||
}, $xml) ?? $xml;
|
||||
// Remove zero-width and soft hyphen
|
||||
$xml = str_replace(["\xE2\x80\x8B", "\xC2\xAD"], '', $xml);
|
||||
|
||||
return $xml;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user