dirty_exit_recovery = array(
'DSTRING' => '/[^">]*+("|$|(?=[>]))/',
'SSTRING' => "/[^'>]*+('|$|(?=[>]))/",
'COMMENT1' => '/(?> [^\\-]+ | -(?!->))*(?:-->|$)/x',
'COMMENT2' => '/[^>]*+(?:>|$)/s',
'CDATA' => '/(?>[^\\]]+|\\](?!\\]>))*(?:\\]{2}>|$)/xs',
'ESC' => '/[^;]*+(?:;|$)/',
'TYPE' => '/[^\s]*/',
'VALUE' => '/[^\s]*/',
'HTMLTAG' => '/[^\s]*/',
);
$this->rule_tag_map = array(
'DSTRING' => 'STRING',
'SSTRING' => 'STRING',
'COMMENT1' => 'COMMENT',
'COMMENT2' => 'COMMENT',
'CDATA' => 'COMMENT',
);
parent::__construct($src);
}
function scan_child($lang) {
assert (isset($this->child_scanners[$lang]));
$scanner = $this->child_scanners[$lang];
$scanner->pos($this->pos());
$substr = $scanner->main();
$this->tokens[] = array(null, $scanner->tagged(), true);
$this->pos($scanner->pos());
if ($scanner->interrupt) {
$this->child_state = array($lang, $this->pos());
} else {
$this->child_state = null;
}
}
function init() {
$this->add_pattern('', '/&/');
if ($this->embedded_server) {
$this->add_pattern('TERM', $this->server_tags);
}
$this->add_pattern('', '/');
$this->state_ = 'global';
if ($this->scripts) {
$js = new LuminousJavaScriptScanner($this->string());
$js->embedded_server = $this->embedded_server;
$js->embedded_html = true;
$js->server_tags = $this->server_tags;
$js->init();
$css = new LuminousCSSScanner($this->string());
$css->embedded_server = $this->embedded_server;
$css->embedded_html = true;
$css->server_tags = $this->server_tags;
$css->init();
$this->add_child_scanner('js', $js);
$this->add_child_scanner('css', $css);
}
}
private $tagname = '';
private $expecting = '';
function main() {
$this->start();
$this->interrupt = false;
while (!$this->eos()) {
$index = $this->pos();
if ($this->embedded_server && $this->check($this->server_tags)) {
$this->interrupt = true;
break;
}
if (!$this->clean_exit) {
try {
$tok = $this->resume();
if ($this->server_break($tok)) break;
$this->record($this->match(), $tok);
} catch (Exception $e) {
if (LUMINOUS_DEBUG) throw $e;
else $this->clean_exit = true;
}
continue;
}
if ($this->child_state !== null && $this->child_state[1] < $this->pos()) {
$this->scan_child($this->child_state[0]);
continue;
}
$in_tag = $this->state_ === 'tag';
if (!$in_tag) {
$next = $this->next_match(false);
if($next) {
$skip = $next[1] - $this->pos();
$this->record($this->get($skip), null);
if ($next[0] === 'TERM') {
$this->interrupt = true;
break;
}
}
} else {
$this->skip_whitespace();
if ($this->embedded_server && $this->check($this->server_tags)) {
$this->interrupt = true;
break;
}
}
$index = $this->pos();
$c = $this->peek();
$tok = null;
$get = false;
if (!$in_tag && $c === '&'
&& $this->scan('/&[^;\s]+;/')
) $tok = 'ESC';
elseif(!$in_tag && $c === '<') {
if ($this->peek(2) === 'scan('/(<)(!DOCTYPE)/i')) {
// special case: doctype
$matches = $this->match_groups();
$this->record($matches[1], null);
$this->record($matches[2], 'KEYWORD');
$this->state_ = 'tag';
continue;
}
// urgh
elseif($this->scan('/
[^\\]]+ | \\](?!\\]>) )*
(?: \\]\\]> | $ )
/ixs'
))
$tok = 'CDATA';
elseif($this->scan('/|$)/xs'))
$tok = 'COMMENT1';
elseif($this->scan('/]*+(?:>|$)/s')) $tok = 'COMMENT2';
else assert(0);
} else {
// check for