argument('contract'); $xmlPath = (string) $this->argument('xml'); if (! is_file($xmlPath)) { $this->error("XML file not found: {$xmlPath}"); return self::FAILURE; } $xml = file_get_contents($xmlPath); if ($xml === false) { $this->error('Unable to read XML file.'); return self::FAILURE; } $contract = Contract::where('uuid', $uuid)->first(); if (! $contract) { $this->error("Contract not found for UUID: {$uuid}"); return self::FAILURE; } // Normalize common Word run boundaries so tokens appear contiguous $norm = $this->normalizeRunsForTokens($xml); $tokens = $scanner->scan($norm); $this->info('Detected tokens:'); foreach ($tokens as $t) { $this->line(" - {$t}"); } if (empty($tokens)) { $this->warn('No tokens detected.'); } // Build a minimal in-memory template using global whitelist so we can resolve values $whitelist = $settings->get()->whitelist ?? []; if (! is_array($whitelist)) { $whitelist = []; } $entities = array_keys($whitelist); $template = new DocumentTemplate([ 'entities' => $entities, 'columns' => $whitelist, 'fail_on_unresolved' => false, 'formatting_options' => [], 'meta' => [], ]); // Resolve values using a relaxed policy to avoid exceptions on unknowns $user = auth()->user() ?? (\App\Models\User::query()->first() ?: new \App\Models\User(['name' => 'System'])); $resolved = $resolver->resolve($tokens, $template, $contract, $user, policy: 'blank'); $values = $resolved['values'] ?? []; $unresolved = $resolved['unresolved'] ?? []; $this->info('Resolved values:'); foreach ($values as $k => $v) { $short = strlen((string) $v) > 120 ? substr((string) $v, 0, 117).'...' : (string) $v; $this->line(" - {$k} => {$short}"); } if (! empty($unresolved)) { $this->warn('Unresolved tokens:'); foreach ($unresolved as $u) { $this->line(" - {$u}"); } } return self::SUCCESS; } private function normalizeRunsForTokens(string $xml): string { // Remove proofing error spans that may split content $xml = preg_replace('#]*/>#i', '', $xml) ?? $xml; // Iteratively collapse boundaries between text runs, even if w:rPr is present $patterns = [ // [optional proofErr] [optional rPr] '#\s*\s*(?:]*/>\s*)*(?:]*>\s*(?:.*?\s*)*)?]*>#is', ]; $prev = null; while ($prev !== $xml) { $prev = $xml; foreach ($patterns as $pat) { $xml = preg_replace($pat, '', $xml) ?? $xml; } } // Remove zero-width and soft hyphen characters $xml = str_replace(["\xE2\x80\x8B", "\xC2\xAD"], '', $xml); return $xml; } }