First commit

This commit is contained in:
Pierre Hubert
2016-11-19 12:08:12 +01:00
commit 990540b2b9
4706 changed files with 931207 additions and 0 deletions

1
3rdparty/luminous/languages/.htaccess vendored Executable file
View File

@ -0,0 +1 @@
Deny from All

64
3rdparty/luminous/languages/ada.php vendored Executable file
View File

@ -0,0 +1,64 @@
<?php
/*
* TODO: user defined types and stuff
*
*/
class LuminousAdaScanner extends LuminousSimpleScanner {
public function init() {
// http://en.wikibooks.org/wiki/Ada_Programming/Keywords
// http://en.wikibooks.org/wiki/Ada_Programming/All_Keywords
$kws = array('abort', 'abstract', 'accept', 'access', 'aliased',
'all', 'array', 'at',
'begin', 'body',
'case', 'constant',
'declare','delay', 'delta', 'digits', 'do',
'else', 'elsif', 'end', 'entry', 'exception', 'exit',
'for', 'function',
'generic', 'goto',
'if', 'interface', 'is',
'limited', 'loop',
'new',
'of', 'others', 'out', 'overriding',
'package', 'pragma', 'private', 'procedure', 'protected',
'raise', 'range', 'record', 'renames', 'requeue', 'return', 'reverse',
'select', 'separate', 'subtype', 'synchronized',
'tagged', 'task', 'terminate', 'then', 'type',
'until', 'use',
'when', 'while', 'with',
);
$ops = array('abs', 'and', 'in', 'mod', 'not', 'or', 'rem', 'xor');
$vals = array('false', 'null', 'true');
// http://en.wikibooks.org/wiki/Ada_Programming/Type_System#Predefined_types
$types = array('Float', 'Duration', 'Character', 'String', 'Boolean',
'Address', 'Storage_Offset', 'Storage_Count', 'Storage_Element',
'Storage_Array',
'Wide_character', 'Wide_Wide_Character',
'Wide_String', 'Wide_Wide_String',
'Integer',
'Long', 'Short', 'Byte');
$ident = '(?i:[a-z](?:_?[a-z]++|\d++)*+)';
// http://en.wikibooks.org/wiki/Ada_Programming/Lexical_elements#Identifiers
$this->add_pattern('OO', "/(?<=[a-z0-9_]')$ident/");
$this->add_pattern('IDENT', "/$ident/");
// http://en.wikibooks.org/wiki/Ada_Programming/Lexical_elements#Numbers
// no bnf :( might be wrong
$this->add_pattern('NUMERIC', '/\d+#[a-f0-9]*#/i');
$this->add_pattern('NUMERIC', "/[0-9]++[0-9_]*+(\.[0-9_]++)?([eE][\-+]?[0-9_]++)?/");
$this->add_pattern('COMMENT', '/--.*/');
$this->add_pattern('OPERATOR', '@=|/=|>=?|<=?|\+|-|\*\*?|/|&|:=@');
// http://rosettacode.org/wiki/Special_characters#Ada
$this->add_pattern('CHARACTER', "/'.'/");
$this->add_pattern('STRING', '/"(?:[^"]++|"")*"/');
$this->add_identifier_mapping('KEYWORD', $kws);
$this->add_identifier_mapping('OPERATOR', $ops);
$this->add_identifier_mapping('VALUE', $vals);
$this->add_identifier_mapping('TYPE', $types);
}
}

69
3rdparty/luminous/languages/as.php vendored Executable file
View File

@ -0,0 +1,69 @@
<?php
// as far as I know, actionscript and javascript are both derivatives of
// ECMA script, and therefore we can subclass JavaScript's scanner and just
// and override the identifier names.
// but we also override init so as to prevent any embedding
class LuminousActionScriptScanner extends LuminousECMAScriptScanner {
function init() {
$this->embedded_server = false;
$this->embedded_script = false;
parent::init();
// add preprocessor support
$this->add_pattern('PREPROCESSOR', '/\^\s*#.*/m');
// clear the identifier map for JS and insert our own.
// $this->ident_map = array();
$this->add_identifier_mapping('', array());
$this->add_identifier_mapping('FUNCTION', array('add', 'chr',
'clearInterval', 'escape', 'eval',
'evaluate', 'fscommand', 'getProperty', 'getTimer', 'getVersion',
'globalStyleFormat', 'gotoAndPlay', 'gotoAndStop', 'ifFrameLoaded',
'instanceOf', 'isFinite', 'isNaN', 'loadMovie', 'loadMovieNum',
'loadVariables', 'mbchr', 'mblength', 'mbord', 'mbsubstring', 'nextFrame',
'nextScene', 'onClipEvent',
'ord', 'parseFloat', 'parseInt', 'play', 'prevFrame', 'prevScene', 'print',
'printAsBitMap', 'printNum', 'printNum', 'random', 'scroll', 'setInterval',
'setProperty', 'stop', 'stopDrag', 'substring', 'super', 'targetPath',
'tellTarget', 'toString', 'toggleHighQuality', 'trace', 'unescape'));
$this->add_identifier_mapping('TYPE', array('Accessibility',
'Array', 'Arguments', 'Boolean',
'Button', 'ByteArray', 'Camera', 'Color', 'Date', 'Event', 'FScrollPane',
'FStyleFormat',
'Function', 'int', 'Key', 'LoadVars', 'LocalConnection', 'Math',
'Microphone', 'Mouse', 'Movieclip', 'Number', 'Object', 'Selection',
'Sound', 'Sprite', 'String', 'System', 'TextField', 'TextFormat',
'Timer', 'TimerEvent', 'uint', 'var', 'void', 'XML'));
$this->add_identifier_mapping('KEYWORD', array('as', 'break',
'case', 'catch', 'class', 'const', 'continue', 'default', 'delete',
'do', 'else', 'extends', 'false', 'finally', 'for', 'function',
'if', 'implements', 'import', 'in', 'instanceof', 'interface', 'internal',
'is', 'native', 'new', 'null', 'package', 'private', 'protected', 'public',
'return', 'super', 'switch', 'static', 'this', 'throw', 'to', 'true', 'try',
'typeof', 'use', 'void', 'while', 'with'));
}
public static function guess_language($src, $info) {
// actionscript looks a lot like a cross between Java and, umm, well,
// Java.
// It has a semi-unique way of declaring types for arguments and
// returns and so forth as argname:type, or function name(args):ret-type
$p = 0.0;
if (preg_match(
'/\\bfunction\s+\w+\s*\\([^\\)]+\\):(String|int|Number|void)/',
$src)) $p += 0.15;
if (preg_match('/\\bvar\s+\w+:(String|int|Number)/', $src)) $p += 0.15;
return $p;
}
}

273
3rdparty/luminous/languages/bash.php vendored Executable file
View File

@ -0,0 +1,273 @@
<?php
/*
* XXX: I don't really know bash all that well. I don't know how the
* interpolation rules work exactly, i.e. if
* x=" $( # )"
* is a left unterminated by the comment or if the comment terminates at the )
* Kate things the latter and I'll go with it.
*
*
* 2011-10-10: Changed comment regex to require a preceding whitespace char
* (or start of string). This seems in line with Kate, and it prevents
* incorrectly hitting some things as comments which are actually
* [I have no idea]. e.g.: for (( i=0; i<${#1}; i=i+2 ));
*
* Also changed it to not apply any highlighting between (( ... ))
* blocks, which fixes a bug regarding shifts being detected as
* heredocs. This is buggy - it should detect SOME types inside
* these blocks, and it should be aware of nested brackets. FIXME.
*/
class LuminousBashScanner extends LuminousScanner {
public $interpolated = false;
public static function string_filter($token) {
$token = LuminousUtils::escape_token($token);
$token[1] = preg_replace("/\\$(?:\w+|\\{[^}\n]+\\})/",
'<VARIABLE>$0</VARIABLE>', $token[1]);
return $token;
}
public function init() {
$this->add_identifier_mapping('KEYWORD', array('case', 'do', 'done',
'elif', 'else', 'esac', 'fi', 'for', 'function', 'if', 'in', 'select',
'then', 'time', 'until', 'while',
'foreach', 'end' // zsh I think
));
// I could ls /usr/bin, but i think this will do for now
$this->add_identifier_mapping('FUNCTION', array('adduser', 'addgroup',
'alias', 'apropos', 'apt-get', 'aptitude', 'aspell', 'awk', 'basename', 'bash',
'bc', 'bg', 'break', 'builtin', 'bzip2', 'cal', 'case', 'cat', 'cd', 'cfdisk',
'chgrp', 'chmod', 'chown', 'chroot', 'chkconfig', 'cksum', 'clear', 'cmp',
'comm', 'command', 'continue', 'cp', 'cron', 'crontab', 'csplit', 'cut', 'date',
'dc', 'dd', 'ddrescue', 'declare', 'df', 'diff', 'diff3', 'dig', 'dir',
'dircolors', 'dirname', 'dirs', 'dmesg', 'du', 'echo', 'egrep', 'eject',
'enable', 'env', 'ethtool', 'eval', 'exec', 'exit', 'expect', 'expand',
'export', 'expr', 'false', 'fdformat', 'fdisk', 'fg', 'fgrep', 'file', 'find',
'fmt', 'fold', 'for', 'format', 'free', 'fsck', 'ftp', 'function', 'fuser',
'gawk', 'getopts', 'git', 'grep', 'groups', 'gzip', 'hash', 'head', 'help',
'history', 'hg', 'hostname', 'iconv', 'id', 'if', 'ifconfig', 'ifdown', 'ifup',
'import', 'install', 'jobs', 'join', 'kill', 'killall', 'less', 'let', 'ln',
'local', 'locate', 'logname', 'logout', 'look', 'lpc', 'lpr', 'lprint',
'lprintd', 'lprintq', 'lprm', 'ls', 'lsof', 'make', 'man', 'mkdir', 'mkfifo',
'mkisofs', 'mknod', 'more', 'mount', 'mtools', 'mtr', 'mv', 'mmv', 'nano',
'netstat', 'nice', 'nl', 'nohup', 'notify-send', 'nslookup', 'open', 'op',
'passwd', 'paste', 'pathchk', 'ping', 'pkill', 'popd', 'pr', 'printcap',
'printenv', 'printf', 'ps', 'pushd', 'pwd', 'quota', 'quotacheck', 'quotactl',
'ram', 'rcp', 'read', 'readarray', 'readonly', 'reboot', 'rename', 'renice',
'remsync', 'return', 'rev', 'rm', 'rmdir', 'rsync', 'screen', 'scp', 'sdiff',
'sed', 'select', 'seq', 'set', 'sftp', 'shift', 'shopt', 'shutdown', 'sleep',
'slocate', 'sort', 'source', 'split', 'ssh', 'strace', 'su', 'sudo', 'sum',
'suspend', 'svn', 'symlink', 'sync', 'tail', 'tar', 'tee', 'test', 'time',
'times', 'touch', 'top', 'traceroute', 'trap', 'tr', 'true', 'tsort', 'tty',
'type', 'ulimit', 'umask', 'umount', 'unalias', 'uname', 'unexpand', 'uniq',
'units', 'unset', 'unshar', 'until', 'useradd', 'usermod', 'users', 'uuencode',
'uudecode', 'v', 'vdir', 'vi', 'vim', 'vmstat', 'watch', 'wc', 'whereis',
'which', 'while', 'who', 'whoami', 'Wget', 'write', 'xargs', 'xdg-open',
'yes',));
$this->remove_stream_filter('oo-syntax');
$this->remove_filter('comment-to-doc');
$this->add_filter('str-filter', 'STRING', array($this, 'string_filter'));
}
function main() {
$stack = array();
while(!$this->eos()) {
$c = $this->peek();
// double brackets are apparently an arithemtic operation
// http://stackoverflow.com/questions/2188199/bash-double-or-single-bracket-parentheses-curly-braces
// anyway, if we apply normal highlighting in them, it seems to break some
// things
// TODO I think we should respect certain sub-types in the (( ... ))
// block, like strings and other stuff. It may require re-factoring the
// scanner to get this right. It may be best to switch this to a
// LumiousStatefulScanner. This will do for now though.
if ($this->scan('/(\\$?)(\({2})/')) {
$dollar = $this->match_group(1);
$this->record($this->match(), $dollar? 'KEYWORD' : null);
if ($this->scan_until('/\){2}/') !== null) {
$this->record($this->match(), null);
$this->record($this->scan('/\){2}/'), $dollar? 'KEYWORD' : null);
} else {
$this->record($this->rest(), null);
$this->terminate();
}
}
if ($this->scan('/\\$([{(])/')) {
$this->record($this->match(), 'KEYWORD');
$stack[] = array($this->match_group(1), true);
}
elseif($c === '[') {
$this->record($this->get(), 'KEYWORD');
$stack[] = array($c, true);
}
elseif ($c === '{' || $c === '(') {
$this->record($this->get(), null);
$stack[] = array($c, false);
}
elseif($c === '}' || $c === ')' || $c ===']') {
$match = array('{'=>'}', '('=>')', '[' => ']');
$type = null;
if (isset($stack[0])) {
$pop = array_pop($stack);
if ($pop[1]) $type = 'KEYWORD';
if ($match[$pop[0]] !== $c) {
// err
$stack[] = $pop;
$type = null;
}
}
$this->record($this->get(), $type);
if (empty($stack) && $this->interpolated) {
break;
}
}
elseif($c === '`') {
$this->record($this->get(), 'KEYWORD');
}
elseif ($this->scan('/
\$( [_a-zA-Z]\w* | [\d\#*@\-!_\\?\\$])
/xm')
) {
$this->record($this->match(), 'VARIABLE');
}
elseif($this->scan('/^(\s*)([_a-zA-Z]\w*(?=[=]))/m')) {
$m = $this->match_groups();
if ($m[1] !== '') $this->record($m[1], null);
$this->record($m[2], 'VARIABLE');
}
elseif (($this->interpolated && count($stack) === 1 &&
$this->scan('/(?<=\s|^)\#.*?(?=[)]|$)/m'))
|| $this->scan('/(?<=\s|^)\#.*/')) {
$this->record($this->match(), 'COMMENT');
}
elseif(($m = $this->scan("/\\$?'(?> [^'\\\\]+ | \\\\.)* '/sx"))) {
$tok = ($m[0] === '$')? 'VARIABLE' : 'STRING';
$this->record($m, $tok);
}
elseif($this->scan('/-*[a-zA-Z_][\-\w]*/')) {
$this->record($this->match(), 'IDENT');
}
// quoted heredoc is the same as a single string, no interpolation,
// A straight regex is causing backtracking problems on my box so
// we're going to do it the hard way
// note that the <<- means the delimiter can be indented.
elseif($this->scan('/(<<-?)(\s*)(["\'])(\w+)((?:\\3)?)/msx')) {
$m = $this->match_groups();
$this->record($m[1] . $m[2], null);
$this->record($m[3] . $m[4] . $m[5], 'DELIMITER');
$delim_regex = "/^(" . (($m[1] === '<<-')? '\s*' : '')
. ')(' . preg_quote($m[4], '/') . ')\\b/m';
$heredoc = $this->scan_until($delim_regex);
if ($heredoc === null) {
$heredoc = $this->rest();
$this->terminate();
}
$this->record($heredoc, 'HEREDOC');
if ($this->scan($delim_regex) !== null) {
$g = $this->match_groups();
if ($g[1] !== '') $this->record($g[1], null);
$this->record($g[2], 'DELIMITER');
}
}
// heredocs and double quoted strings are pretty much the same
elseif($this->scan('/(<<-?\s*)(\w+)/') ||
$this->scan('/\\$?"/'))
{
$pos = $this->match_pos();
$m = $this->match_groups();
$type = 'STRING';
$delim = '';
if ($m[0][0] === '<') {
$type = 'HEREDOC';
$this->record($m[1], null);
$this->record($m[2], 'KEYWORD');
$delim = $m[2];
if ($m[0][2] === '-') $delim = "[ \t]*" . $delim;
$pos = $this->pos();
}
elseif($m[0][0] === '$') $type = 'VARIABLE';
$in_str = true;
$searches = array(($type === 'HEREDOC')? "/^$delim\\b/m" :
'/(?<!\\\\)((?:\\\\\\\\)*)(")/',
'/(?<!\\\\)((?:\\\\\\\\)*)(\\$\\()/');
while(1) {
list($index, $matches) = $this->get_next($searches);
if ($index === -1) {
$this->record(substr($this->string(), $pos), $type);
$this->terminate();
break;
}
$hit = isset($matches[2])? $matches[2] : $matches[0];
$index_ = $index + strlen($matches[0]);
if($hit === '"') {
$this->record(substr($this->string(), $pos, $index_ - $pos), $type);
$this->pos($index_);
break;
}
// URGH WORST CHECK EVER.
elseif($type === 'HEREDOC' && !isset($matches[2])) {
$this->record(substr($this->string(), $pos, $index-$pos), $type);
$this->record($hit, 'KEYWORD');
$this->pos($index_);
break;
}
else {
$index_ = $index + strlen($matches[1]);
$this->record(substr($this->string(), $pos, $index_-$pos), $type);
$child = new LuminousBashScanner($this->string());
$child->pos($index_);
$child->interpolated = true;
$child->init();
$child->main();
$this->record($child->tagged(), 'INTERPOLATED', true);
$pos = $child->pos();
$this->pos($pos);
}
}
}
elseif($this->scan('/\d*[<>]+&?\d*/')) {
$this->record($this->match(), 'KEYWORD');
}
elseif($this->scan("/[^_\-a-zA-Z$'\"\#\{\}\(\)\[\]<>&\d`\n]+/") !== null) {
$this->record($this->match(), null);
}
else
$this->record($this->get(), null);
}
}
public static function guess_language($src, $info) {
$p = 0.0;
if (preg_match('%\\b (?:bash|csh|ksh|zsh|sh) \\b%x',
$info['shebang'])
)
return 1.0;
// strange conditional syntax -- if [ -z ... ]
if (preg_match('/ (if|while) \s++ \\[\s++-\w/x', $src)) $p += 0.10;
// quoted vars used in comparison: if [ "$somevar" ...
if (preg_match('/"\\$\w++"/', $src)) $p += 0.05;
// case ... esac has to be worth something
if (strpos($src, 'case') < strpos($src, 'esac')) $p += 0.1;
return $p;
}
}

95
3rdparty/luminous/languages/bnf.php vendored Executable file
View File

@ -0,0 +1,95 @@
<?php
/*
* BNF has a lot of different variants and matching them all is pretty much
* impossible.
*
* We're going to match the standard BNF and extended BNF and hopefully a
* few very similar dialects
*/
class LuminousBNFScanner extends LuminousStatefulScanner {
function user_def_ext($matches) {
if ($matches[1] !== '')
$this->record($matches[1], null);
$this->record_token($matches[2], 'USER_FUNCTION');
$this->user_defs[$matches[2]] = 'VALUE';
$this->pos_shift(strlen($matches[1]) + strlen($matches[2]));
}
private function set_strict() {
// no transition table necessary, I think
$this->add_pattern('COMMENT', '/<![^>]*>/');
$this->add_pattern('KEYWORD', '/(?<=^<)[^>]+(?=>)/m');
$this->add_pattern('KEYWORD', '/(?<=^\\{)[^\\}]+(?=\\})/m');
$this->add_pattern('VALUE', '/(?<=\\{)[^\\}]+(?=\\})/');
$this->add_pattern('VALUE', '/[\\-\w]+/');
}
private function set_extended() {
$this->add_pattern('COMMENT', '/\\(\\* .*? \\*\\)/sx');
$this->add_pattern('OPTION', '/\\[/', '/\\]/');
$this->add_pattern('REPETITION', '/\\{/', '/\\}/');
$this->add_pattern('GROUP', '/\\(/', '/\\)/');
$this->add_pattern('SPECIAL', '/\\?/', '/\\?/');
$ident = '(?:[\w\\-]+)';
$this->add_pattern('RULE', "/(^[ \t]*)($ident)(\s*(?![[:alnum:]\s]))/mi");
$this->overrides['RULE'] = array($this, 'user_def_ext');
$this->add_pattern('IDENT', "/$ident/");
// technically I don't know if we really need to worry about a transition
// table, but here we are anyway
$all = array('COMMENT', 'OPTION', 'REPETITION', 'GROUP', 'SPECIAL',
'STRING', 'IDENT', 'OPERATOR');
$almost_all = array_filter($all, create_function('$x',
'return $x !== "SPECIAL";'));
$this->transitions = array(
'initial' => array_merge(array('RULE'), $all),
'OPTION' => $all,
'REPETITION' => $all,
'GROUP' => $all,
'SPECIAL' => $almost_all
);
$this->rule_tag_map = array(
'OPTION' => null,
'REPETITION' => null,
'GROUP' => null,
'SPECIAL' => null
);
}
function init() {
// the original BNF uses <angle brackets> to delimit its
// production rule names
if (preg_match('/<\w+>/', $this->string())) {
$this->set_strict();
}
else {
$this->set_extended();
}
$this->add_pattern('STRING', LuminousTokenPresets::$SINGLE_STR_SL);
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR_SL);
$this->add_pattern('OPERATOR', '/[*\\-=+;:\\|,]+/');
// assume a few chars at bol indicate a commented line
$this->add_pattern('COMMENT', '/^[!%-;].*/m');
$this->remove_filter('constant');
$this->remove_filter('comment-to-doc');
}
static function guess_language($src, $info) {
// being honest, BNF is going to be so rare that if we ever return
// anything other than 0, it's more likely that we're obscuring the
// correct scanner than correctly identifying BNF.
return 0;
}
}

132
3rdparty/luminous/languages/cpp.php vendored Executable file
View File

@ -0,0 +1,132 @@
<?php
require_once(dirname(__FILE__) . '/include/c_func_list.php');
// TODO: trigraph... does anyone use these?
class LuminousCppScanner extends LuminousSimpleScanner {
function __construct($src=null) {
parent::__construct($src);
$this->add_filter('preprocessor', 'PREPROCESSOR',
array($this, 'preprocessor_filter'));
$this->add_identifier_mapping('FUNCTION',
$GLOBALS['luminous_c_funcs']);
$this->add_identifier_mapping('KEYWORD',
$GLOBALS['luminous_c_keywords']);
$this->add_identifier_mapping('TYPE',
$GLOBALS['luminous_c_types']);
}
function init() {
// http://www.lysator.liu.se/c/ANSI-C-grammar-l.html
// D [0-9]
// L [a-zA-Z_]
// H [a-fA-F0-9]
// E [Ee][+-]?{D}+
// FS (f|F|l|L)
// IS (u|U|l|L)*//
// {L}({L}|{D})* ident
// 0[xX]{H}+{IS}? hex
// 0{D}+{IS}? octal
// {D}+{IS}? int
// L?'(\\.|[^\\'])+' char
// {D}+{E}{FS}? real/float
// {D}*"."{D}+({E})?{FS}? real/float
// {D}+"."{D}*({E})?{FS}? real/float
// L?\"(\\.|[^\\"])*\" string, but we should exclude nl
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_SL);
$this->add_pattern('STRING', "/L?\"(?: [^\\\\\"\n]+ | \\\\.)*(?:$|\")/xms");
// if memory serves, a char looks like this:
$this->add_pattern('CHARACTER',
"/L? ' (?: \\\\(?: x[A-F0-9]{1,2}| . ) | . ) (?: '|$)/ixm");
$this->add_pattern('OPERATOR', '@[!%^&*\-/+=~:?.|<>]+@');
$this->add_pattern('NUMERIC', '/0[xX][A-F0-9]+[uUlL]*/i');
$this->add_pattern('NUMERIC', '/
(?:
(?: \d* \.\d+ | \d+\.\d*)
([eE][+-]?\d+)?
([fFlL]?)
)
/ix');
$this->add_pattern('NUMERIC', '/
\d+([uUlL]+ | ([eE][+-]?\d+)?[fFlL]? | ) #empty string on the end
/x'); //inc octal
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('PREPROCESSOR', '/^[ \t]*\#/m');
$this->add_pattern('IDENT', '/[a-zA-Z_]+\w*/');
$this->overrides['PREPROCESSOR'] = array($this, 'preprocessor_override');
}
function preprocessor_override() {
$this->skip_whitespace();
// #if 0s nest, according to Kate, which sounds reasonable
$pattern = '/^\s*#\s*if\s+0\\b/m';
if($this->check($pattern)) {
$this->nestable_token('COMMENT', '/^\s*#\s*if(?:n?def)?\\b/m',
'/^\s*#\s*endif\\b/m');
}
else {
// a preprocessor statement may have nested comments and strings. We
// go the lazy route and just zap the whole thing with a regex and let a
// filter figure out any nested highlighting
$this->scan("@ \#
(?: [^/\n\\\\]+
| /\* (?> [^\\*]+ | (?:\*(?!/))+ ) (?: $|\*/) # nested ML comment
| //.* # nested SL comment
| /
| \\\\(?s:.) # escape, and newline
)* @x");
$this->record($this->match(), 'PREPROCESSOR');
}
}
static function preprocessor_filter_cb($matches) {
if (!isset($matches[0]) || !isset($matches[0][0]))
return ''; // shouldn't ever happen
if ($matches[0][0] === '"') return LuminousUtils::tag_block('STRING', $matches[0]);
else if ($matches[0][0] === '&')
return '&lt;' . LuminousUtils::tag_block('STRING', $matches[1]) . '&gt;';
else return LuminousUtils::tag_block('COMMENT', $matches[0]);
}
static function preprocessor_filter($token) {
$token = LuminousUtils::escape_token($token);
$token[1] = preg_replace_callback("@
(?:\" (?> [^\\\\\n\"]+ | \\\\. )* (?: \"|$) | (?: &lt; (.*?) &gt;))
| // .*
| /\* (?s:.*?) (\*/ | $)
@x",
array('LuminousCppScanner', 'preprocessor_filter_cb'),
$token[1]);
return $token;
}
static function guess_language($src, $info) {
// Obviously, C tends to look an awful lot like pretty much every other
// language. Its only real pseudo-distinct feature is the ugly
// preprocessor and "char * ", so let's go with that
$p = 0.0;
if (preg_match('/^\s*+#\s*+(include\s++[<"]|ifdef|endif|define)\\b/m',
$src)
)
$p += 0.3;
if (preg_match('/\\bchar\s*\\*\s*\w+/', $src)) $p += 0.05;
if (preg_match('/\\bmalloc\s*\\(/', $src)) $p += 0.02;
// TODO we could guess at some C++ stuff too
return $p;
}
}

52
3rdparty/luminous/languages/csharp.php vendored Executable file
View File

@ -0,0 +1,52 @@
<?php
class LuminousCSharpScanner extends LuminousSimpleScanner {
public function init() {
$this->add_pattern('PREPROCESSOR', "/\\#(?: [^\\\\\n]+ | \\\\. )*/sx");
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_SL);
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
$this->add_pattern('CHARACTER', LuminousTokenPresets::$SINGLE_STR);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('IDENT', '/[a-z_]\w+/i');
$this->add_pattern('OPERATOR', '/[¬!%^&*\-=+~|?\\/><;:.,]+/i');
$this->add_identifier_mapping('KEYWORD', array('abstract', 'as', 'base',
'break', 'case', 'catch', 'checked', 'class', 'continue', 'default',
'delegate', 'do', 'event', 'explicit', 'extern', 'else', 'finally',
'false', 'fixed', 'for', 'foreach', 'goto', 'if', 'implicit', 'in',
'interface', 'internal', 'is', 'lock', 'new', 'null', 'namespace',
'operator', 'out', 'override', 'params', 'private',
'protected', 'public', 'readonly', 'ref', 'return', 'struct', 'switch',
'sealed', 'sizeof', 'stackalloc', 'static', 'this', 'throw', 'true',
'try', 'typeof', 'unchecked', 'unsafe', 'using', 'var', 'virtual',
'volatile', 'while', 'yield'));
require(dirname(__FILE__) . '/include/csharp_list.php');
$this->add_identifier_mapping('TYPE', array_merge(array(
// primatives
'bool', 'byte', 'char',
'const', 'double', 'decimal', 'enum', 'float', 'int', 'long',
'object',
'sbyte', 'short', 'string', 'uint', 'ulong', 'ushort',
'void'
),
$luminous_csharp_type_list)
);
}
static function guess_language($src, $info) {
$p = 0.0;
if (preg_match('/^\s*#region\\b/m', $src)) $p += 0.10;
if (preg_match('/^\s*using\s+System;/m', $src)) $p += 0.10;
if (preg_match('/^\s*using\s+System\\..*;/m', $src)) $p += 0.10;
if (preg_match('/partial\s+class\s+\w+/', $src)) $p += 0.05;
return $p;
}
}

190
3rdparty/luminous/languages/css.php vendored Executable file
View File

@ -0,0 +1,190 @@
<?php
/**
* CSS scanner.
* TODO: it would be nice if we could extend this somehow to handle
* CSS dialects which allow rule nesting.
*/
class LuminousCSSScanner extends LuminousEmbeddedWebScript {
private $expecting;
function __construct($src=null) {
parent::__construct($src);
$this->rule_tag_map = array(
'TAG' => 'KEYWORD',
'KEY' => 'TYPE',
'SELECTOR' => 'VARIABLE',
'ATTR_SELECTOR' => 'OPERATOR',
'SSTRING' => 'STRING',
'DSTRING' => 'STRING',
'ROUND_BRACKET_SELECTOR' => 'OPERATOR',
);
$this->dirty_exit_recovery = array(
'COMMENT' => '%.*?(?:\*/|$)%s',
'SSTRING' => "/(?:[^\\\\']+|\\\\.)*(?:'|$)/",
'DSTRING' => '/(?:[^\\\\"]+|\\\\.)*(?:"|$)/',
'ATTR_SELECTOR' => '/(?: [^\\]\\\\]+ | \\\\.)* (?:\]|$)/xs',
'ROUND_BRACKET_SELECTOR' => '/(?: [^\\)\\\\]+ | \\\\.)* (?:\)|$)/xs',
);
$this->state_ [] = 'global';
}
function init() {
$this->expecting = null;
}
function main() {
$comment_regex = '% /\* .*? \*/ %sx';
$this->start();
while (!$this->eos()) {
if (!$this->clean_exit) {
try {
$tok = $this->resume();
if ($this->server_break($tok)) break;
$this->record($this->match(), $tok);
} catch(Exception $e) {
if (LUMINOUS_DEBUG) throw $e;
else continue;
}
}
$this->skip_whitespace();
$pos = $this->pos();
$tok = null;
$m = null;
$state = $this->state();
$in_block = $state === 'block';
$in_media = $state === 'media';
$get = false;
$c = $this->peek();
if ($this->embedded_server && $this->check($this->server_tags)) {
$this->interrupt = true;
$this->clean_exit = true;
break;
}
elseif ($c === '/' && $this->scan(LuminousTokenPresets::$C_COMMENT_ML))
$tok = 'COMMENT';
elseif($in_block && $c === '#' &&
$this->scan('/#[a-fA-F0-9]{3}(?:[a-fA-F0-9]{3})?/'))
$tok = 'NUMERIC';
elseif($in_block && (ctype_digit($c) || $c === '-')
&& $this->scan('/-?(?>\d+)(\.(?>\d+))?(?:em|px|ex|ch|mm|cm|in|pt|%)?/')
!== null) {
$tok = 'NUMERIC';
}
elseif(!$in_block && $this->scan('/(?<=[#\.:])[\w\-]+/') !== null)
$tok = 'SELECTOR';
// check for valid super-blocks, e.g. media {...} and @keyframes {}
elseif(!$in_block && !$in_media && $c === '@'
&& $this->scan('/@
(-(moz|ms|webkit|o)-)?keyframes\\b
|
media\\b/x')
) {
$this->state_[] = 'media';
$tok = 'TAG';
}
elseif(( ctype_alpha($c) || $c === '!' || $c === '@' || $c === '_' || $c === '-' )
&& $this->scan('/(!?)[\-\w@]+/')) {
if ($in_media) $tok = 'VALUE';
elseif (!$in_block || $this->match_group(1) !== '') $tok = 'TAG';
elseif($this->expecting === 'key') $tok = 'KEY';
elseif($this->expecting === 'value') {
$m = $this->match();
if ($m === 'url' || $m === 'rgb' || $m === 'rgba') $tok = 'FUNCTION';
else $tok = 'VALUE';
}
}
// TODO attr selectors should handle embedded strings, I think.
elseif(!$in_block && $c === '['
&& $this->scan('/\[ (?> [^\\]\\\\]+ | \\\\.)* \]/sx'))
$tok = 'ATTR_SELECTOR';
elseif(!$in_block && $c === '('
&& $this->scan('/\( (?> [^\\)\\\\]+ | \\\\.)* \) /sx')) {
$tok = 'ROUND_BRACKET_SELECTOR';
}
elseif($c === '}' || $c === '{') {
$get = true;
if ($c === '}' && ($in_block || $in_media)) {
array_pop($this->state_);
if ($in_media) {
// @media adds a 'media' state, then the '{' begins a new global state.
// We've just popped global, now we need to pop media.
array_pop($this->state_);
}
}
elseif (!$in_block && $c === '{') {
if ($in_media) {
$this->state_[] = 'global';
}
else {
$this->state_[] = 'block';
$this->expecting = 'key';
}
}
}
elseif($c === '"' && $this->scan(LuminousTokenPresets::$DOUBLE_STR))
$tok = 'DSTRING';
elseif($c === "'" && $this->scan(LuminousTokenPresets::$SINGLE_STR))
$tok = 'SSTRING';
elseif($c === ':' && $in_block) {
$this->expecting = 'value';
$get = true;
$tok = 'OPERATOR';
}
elseif($c=== ';' && $in_block) {
$this->expecting = 'key';
$get = true;
$tok = 'OPERATOR';
}
elseif($this->embedded_html && $this->check('%<\s*/\s*style%i')) {
$this->interrupt = false;
$this->clean_exit = true;
break;
}
elseif($this->scan('/[:\\.#>*]+/')) $tok = 'OPERATOR';
else {
$get = true;
}
if ($this->server_break($tok)) break;
$m = $get? $this->get() : $this->match();
$this->record($m, $tok);
assert($this->pos() > $pos || $this->eos());
}
}
public static function guess_language($src, $info) {
$p = 0;
if (preg_match(
"/(font-family|font-style|font-weight)\s*+:\s*+[^;\n\r]*+;/", $src))
$p += 0.15;
if (strpos($src, '!important') !== false) $p += 0.05;
// generic rule
if (preg_match("/\\b(div|span|table|body)\\b [^\n\r\{]*+ [\r\n]*+ \{/x",
$src))
$p += 0.10;
return $p;
}
}

208
3rdparty/luminous/languages/diff.php vendored Executable file
View File

@ -0,0 +1,208 @@
<?php
/*
* Diff is a strange one because we could just highlight the lines and be done
* with it, but we are actually going to try to highlight the source code AND
* the diff format
*
* As such, we handle formatting and tagging inside the scanner.
*/
class LuminousDiffScanner extends LuminousScanner {
public $patterns = array();
public $pretty_mode = false; // pretty mode uses language sub-scanners
// to try to highlight the embedded code
/* TODO: plug this into the language code selector in the old EasyAPI
* when we port it across
* This function is just a placeholder and will be implemented properly
* later.
*/
function get_child_scanner($filename) {
// HACK - pretty mode should be reflected elsewhere than here.
if (!$this->pretty_mode) return null;
// $luminous_ is a singleton from the main calling API. It may or may not
// exist here, but if it does, we're going to use it.
global $luminous_;
if (!isset($luminous_))
return null;
$spos = strrpos($filename, '.');
if ($spos === false) {return null;}
$ext = substr($filename, $spos+1);
$s = $luminous_->scanners->GetScanner(strtolower($ext));
// we actually only want the classname, not an instance.
if ($s === null) return null;
else return get_class($s);
}
function string($string=null) {
if ($string !== null) {
if (preg_match('/^[><]/m', $string)) {
// normal rules
$this->patterns['range'] = '/\d+.*/';
$this->patterns['codeblock'] = "/(^([<> ]).*(\n)?)+/m";
}
elseif (preg_match('/^\*{3}/m', $string)) {
// context
$this->patterns['range'] = "/([\-\*]{3})[ \t]+\d+,\d+[ \t]+\\1.*/";
$this->patterns['codeblock'] = "/(^([!+ ]).*(\n)?)+/m";
}
else {
// unified
$this->patterns['range'] = "/@@.*/";
$this->patterns['codeblock'] = "/(^([+\- ]).*(\n)?)+/m";
}
}
return parent::string($string);
}
function main() {
// we're aiming to handle context, unified and normal diff all at once here
// because it doesn't really seem that hard.
$child = null;
$last_index = -1;
while (!$this->eos()) {
$index = $this->pos();
assert($index > $last_index);
$last_index = $index;
assert($this->bol());
$tok = null;
if ($this->scan('/diff\s.*$/m') !== null) $tok = 'KEYWORD';
// normal, context and unified ranges
elseif($this->scan($this->patterns['range']) !== null)
$tok = 'DIFF_RANGE';
elseif($this->scan("/-{3}[ \t]*$/m")) $tok = null;
elseif($this->scan('/(?:\**|=*|\w.*)$/m') !== null) $tok = 'KEYWORD';
// this is a header line which may contain a file path. If it does,
// update the child scanner according to its extension.
elseif($this->scan("@[+\-\*]{3}(\s+([^\s]*)([ \t]|$))?.*@m") !== null) {
$m = $this->match_groups();
// unified uses +++, context uses *
if ($m[0][0] === '+' || $m[0][0] === '*')
$tok = 'DIFF_HEADER_NEW';
else $tok = 'DIFF_HEADER_OLD';
if (isset($m[2])) {
$filename = preg_replace('@.*\\\\/@', '', $m[2]);
$child = self::get_child_scanner($filename);
}
}
elseif($this->scan('/\\\\.*/') !== null) $tok = null;
elseif($this->scan($this->patterns['codeblock']) !== null) {
// this is actual source code.
// we're going to format this here.
// we're going to extract the block, and try to re-assemble it as
// verbatim code, then highlight it via a child scanner, then split up
// the lines, re-apply the necessary prefixes (e.g. + or -) to them,
// and store them as being a DIFF_ token.
// we have to do it like this, rather than line by line, otherwise
// multiline tokens aren't going to work properly. There's stilla risk
// that the diff will be fragmented such the child scanner gets it
// wrong but that can't be helped.
// TODO restructure this so the complicated bits aren't done if there's
// no child scanner to pass it down to
$block = $this->match();
if (!strlen($block)) {
assert(0);
}
$lines = explode("\n", $block);
$verbatim = array();
$verbatim_ = '';
$types = array();
$prefixes = array();
foreach($lines as $l) {
if (!strlen($l) || $l[0] === ' ')
$types[]= 'DIFF_UNCHANGED';
elseif ($l[0] === '+' || $l[0] === '>')
$types[] = 'DIFF_NEW';
elseif ($l[0] === '!' || $l[0] === '<' || $l[0] === '-')
$types[] = 'DIFF_OLD';
else assert(0);
$prefixes[] = (isset($l[0]))? $l[0] : '';
$verbatim_[] = substr($l, 1);
}
$verbatim = implode("\n", $verbatim_);
$escaped = false;
$tagged;
if ($child !== null) {
$c = new $child;
$c->init();
$c->string($verbatim);
$c->main();
$tagged = $c->tagged();
$escaped = true;
} else {
$tagged = $verbatim;
}
$exp = explode("\n", $tagged);
assert(count($exp) === count($prefixes));
foreach($exp as $i=>$v) {
$t = $types[$i];
// if the sub-scanner escaped the line, we also need to escape the
// prefix for consistency
$prefix = $prefixes[$i];
if ($escaped) $prefix = LuminousUtils::escape_string($prefix);
$text = $prefix . $v;
$this->record(
$text,
$t,
$escaped);
if ($i < count($exp)-1) $this->record("\n", null);
}
if ($this->eol()) $this->record($this->get(), null);
continue;
}
else $this->scan('/.*/');
// previous else clause can capture empty strings
if ($this->match() !== '')
$this->record($this->match(), $tok);
assert($this->eol());
// consume newline
if (!$this->eos()) $this->record($this->get(), null);
}
}
static function guess_language($src, $info) {
// diff isn't too hard. We check for 'index' and a few other things
$p = 0.0;
if (preg_match("/^-{3}.*+[\n\r]++\\+{3}/m", $src)) $p = 0.25;
if (preg_match('/^@@.*@@/m', $src)) $p += 0.25;
if (preg_match('/^(index|diff)\\b/m', $src)) $p += 0.10;
// finally we look for the diff markers at the line starts
// we're going to use the remaining 40% of the probability as so:
// We'll say a perfect match for diff has
// 10%+ of its lines starting with the +/- markers (</> or +/! for
// context/original format), and we'll scale real proportion
// to fill up the remaining 0.4
$c = preg_match_all('/^[<>+\\-!]\s/m', $src, $m);
$num_lines = $info['num_lines'];
if ($num_lines > 0) {
$proportion = $c/$num_lines;
$proportion = min(0.1, $proportion);
$p += 0.4 * ($proportion * 10);
}
return $p;
}
}
class LuminousPrettyDiffScanner extends LuminousDiffScanner {
public $pretty_mode = true;
}

257
3rdparty/luminous/languages/ecmascript.php vendored Executable file
View File

@ -0,0 +1,257 @@
<?php
/**
* This is a rename of the JavaScript scanner.
* TODO Some of these things are JS specific and should be moved into
* the new JS scanner.
*/
class LuminousECMAScriptScanner extends LuminousEmbeddedWebScript {
public $script_tags = '</script>';
// regular expressions in JavaScript are delimited by '/', BUT, the slash
// character may appear unescaped within character classes
// we can handle this fairly easily with a single regex because the classes
// do not nest
// TODO:
// I do not know if this is specific to Javascript or ECMAScript derivatives
// as a whole, I also don't know if multi-line regexen are legal (i.e. when
// the definition spans multiple lines)
protected $regex_regex = "%
/
(?:
[^\\[\\\\/]+ # not slash, backslash, or [
| \\\\. # escape char
|
(?: # char class [..]
\\[
(?:
[^\\]\\\\]+ # not slash or ]
| \\\\. # escape
)*
(?: \\] | \$)
) # close char class
)*
(?: /[iogmx]* | \$) #delimiter or eof
%sx";
// logs a persistent token stream so that we can lookbehind to figure out
// operators vs regexes.
private $tokens_ = array();
private $child_state = null;
function __construct($src=null) {
$this->rule_tag_map = array(
'COMMENT_SL' => 'COMMENT',
'SSTRING' => 'STRING',
'DSTRING' => 'STRING',
'OPENER' => null,
'CLOSER' => null,
);
$this->dirty_exit_recovery = array(
'COMMENT_SL' => '/.*/',
'COMMENT' => '%.*?(\*/|$)%s',
'SSTRING' => "/(?:[^\\\\']+|\\\\.)*('|$)/",
'DSTRING' => '/(?:[^\\\\"]+|\\\\.)*("|$)/',
// FIXME: Anyone using a server-side interruption to build a regex is
// frankly insane, but we are wrong in the case that they were in a
// character class when the server language interrupted, and we may
// exit the regex prematurely with this
'REGEX' => '%(?:[^\\\\/]+|\\\\.)*(?:/[iogmx]*|$)%',
);
parent::__construct($src);
$this->add_identifier_mapping('KEYWORD', array('break', 'case', 'catch',
'comment', 'continue', 'do', 'default', 'delete', 'else', 'export',
'for', 'function', 'if', 'import', 'in', 'instanceof', 'label', 'new',
'null', 'return', 'switch', 'throw', 'try', 'typeof', 'var', 'void',
'while', 'with',
'true', 'false', 'this'
));
$this->add_identifier_mapping('FUNCTION', array('$', 'alert', 'confirm',
'clearTimeout', 'clearInterval',
'encodeURI', 'encodeURIComponent', 'eval', 'isFinite', 'isNaN',
'parseInt', 'parseFloat', 'prompt',
'setTimeout', 'setInterval',
'decodeURI', 'decodeURIComponent', 'jQuery'));
$this->add_identifier_mapping('TYPE', array('Array', 'Boolean', 'Date',
'Error', 'EvalError', 'Infinity', 'Image', 'Math', 'NaN', 'Number',
'Object', 'Option', 'RangeError', 'ReferenceError', 'RegExp', 'String',
'SyntaxError', 'TypeError', 'URIError',
'document',
'undefined', 'window'));
}
function is_operand() {
for ($i = count($this->tokens) -1 ; $i>= 0; $i--) {
$tok = $this->tokens[$i][0];
if ($tok === null || $tok === 'COMMENT' || $tok === 'COMMENT_SL') continue;
return ($tok === 'OPERATOR' || $tok === 'OPENER');
}
return true;
}
function init() {
if ($this->embedded_server)
$this->add_pattern('STOP_SERVER', $this->server_tags);
if ($this->embedded_html)
$this->add_pattern('STOP_SCRIPT', '%</script>%');
$op_pattern = '[=!+*%\-&^|~:?\;,.>';
if (!($this->embedded_server || $this->embedded_html))
$op_pattern .= '<]+';
else {
// build an alternation with a < followed by a lookahead
$op_pattern .= ']|<(?![';
// XXX this covers <? and <% but not very well
if ($this->embedded_server) $op_pattern .= '?%';
if ($this->embedded_html) $op_pattern .= '/';
$op_pattern .= '])'; // closes lookahead
$op_pattern = "(?:$op_pattern)+";
}
$op_pattern = "@$op_pattern@";
$this->add_pattern('IDENT', '/[a-zA-Z_$][_$\w]*/');
// NOTE: slash is a special case, and </ may be a script close
$this->add_pattern('OPERATOR', $op_pattern);
// we care about openers for figuring out where regular expressions are
$this->add_pattern('OPENER', '/[\[\{\(]+/');
$this->add_pattern('CLOSER', '/[\]\}\)]+/');
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('SSTRING', LuminousTokenPresets::$SINGLE_STR_SL);
$this->add_pattern('DSTRING', LuminousTokenPresets::$DOUBLE_STR_SL);
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
$this->add_pattern('COMMENT_SL', LuminousTokenPresets::$C_COMMENT_SL);
// special case
$this->add_pattern('SLASH', '%/%');
$stop_patterns = array();
$xml_scanner = new LuminousHTMLScanner($this->string());
$xml_scanner->xml_literal = true;
$xml_scanner->scripts = false;
$xml_scanner->embedded_server = $this->embedded_server;
if ($this->embedded_server)
$xml_scanner->server_tags = $this->server_tags;
$xml_scanner->init();
$xml_scanner->pos($this->pos());
$this->add_child_scanner('xml', $xml_scanner);
}
// c+p from HTML scanner
function scan_child($lang) {
assert (isset($this->child_scanners[$lang]));
$scanner = $this->child_scanners[$lang];
$scanner->pos($this->pos());
$substr = $scanner->main();
$this->record($scanner->tagged(), 'XML', true);
$this->pos($scanner->pos());
if ($scanner->interrupt) {
$this->child_state = array($lang, $this->pos());
} else {
$this->child_state = null;
}
}
function main() {
$this->start();
$this->interrupt = false;
while (!$this->eos()) {
$index = $this->pos();
$tok = null;
$m = null;
$escaped = false;
if (!$this->clean_exit) {
try {
$tok = $this->resume();
} catch(Exception $e) {
if (LUMINOUS_DEBUG) throw $e;
else {
$this->clean_exit = true;
continue;
}
}
}
elseif ($this->child_state !== null && $this->child_state[1] < $this->pos()) {
$this->scan_child($this->child_state[0]);
continue;
}
elseif (($rule = $this->next_match()) !== null) {
$tok = $rule[0];
if ($rule[1] > $index) {
$this->record(substr($this->string(), $index, $rule[1] - $index), null);
}
} else {
$this->record(substr($this->string(), $index), null);
$this->clean_exit = true;
$this->interrupt = false;
$this->terminate();
break;
}
if ($tok === 'SLASH') {
if ($this->is_operand()) {
$tok = 'REGEX';
$this->unscan();
assert($this->peek() === '/');
$m = $this->scan($this->regex_regex);
if ($m === null) {
assert(0);
$m = $this->rest();
$this->terminate();
}
} else {
$tok = 'OPERATOR';
}
}
elseif ($tok === 'OPERATOR' && $this->match() === '<') {
if ($this->is_operand()) {
$this->unscan();
$this->scan_child('xml');
continue;
}
}
elseif ($tok === 'STOP_SERVER') {
$this->interrupt = true;
$this->unscan();
break;
}
elseif ($tok === 'STOP_SCRIPT') {
$this->unscan();
break;
}
if ($m === null)
$m = $this->match();
if ($this->server_break($tok))
break;
if ($tok === 'COMMENT_SL' && $this->script_break($tok)
)
break;
assert($this->pos() > $index);
$tag = $tok;
$this->record($m, $tag, $escaped);
}
}
}

163
3rdparty/luminous/languages/erlang.php vendored Executable file
View File

@ -0,0 +1,163 @@
<?php
/*
* Erlang.
*
* Various comments refer to section numbers in the official spec, which can
* be found at http://www.erlang.org/download/erl_spec47.ps.gz
*/
class LuminousErlangScanner extends LuminousSimpleScanner {
// applies interpolation highlighting, can't find a proper
// reference for this though
static function str_filter($token) {
if (strpos($token[1], '~') == false) return $token;
$token = LuminousUtils::escape_token($token);
$token[1] = preg_replace('/~(?:\d+|.)/',
'<INTERPOLATION>$0</INTERPOLATION>', $token[1]);
return $token;
}
// helper function: generates a regex which matches only numeric strings
// in the given base
static function build_based_int_regex($base) {
assert(2 <= $base && $base <= 16);
$regex = '/(?i:[0-';
if ($base <= 10)
$regex .= (string)$base-1;
else
$regex .= '9a-' . strtolower(dechex($base-1));
$regex .= '])+/';
return $regex;
}
// 3.11 integers are pretty strange, you are allowed to specify base
// 2 ><= b <= 16 arbitrarily.
function based_int($matches) {
$base = $matches[1];
$match = $matches[0];
$this->pos_shift(strlen($matches[0]));
$number = null;
if ($base >= 2 && $base <= 16)
$number = $this->scan($this->build_based_int_regex((int)$base));
if ($number !== null) {
$match .= $number;
}
$this->record($match, 'NUMERIC');
// now we're going to greedily consume any trailing numbers
// This handles the case e.g. 2#001122,
// we don't want the '22' to get caught as a separate literal, we want to
// make sure it's NOT highlighted as a literal
// so we consume it here.
if ($this->scan('/\d+/') !== null) {
$this->record($this->match(), null);
}
}
static function oo_stream_filter($tokens) {
$c = count($tokens)-1;
for($i=0; $i<$c; $i++) {
if ($tokens[$i][1] === ':') {
if ($i > 0) {
$behind = &$tokens[$i-1][0];
if ($behind === 'IDENT') $behind = 'OBJ';
}
if ($i < $c-1) {
$ahead = &$tokens[$i+1][0];
if ($ahead === 'IDENT') $ahead = 'OO';
$i++;
}
}
}
return $tokens;
}
function init() {
$this->remove_stream_filter('oo-syntax');
$this->remove_filter('comment-to-doc');
$this->add_stream_filter('oo-syntax', array($this, 'oo_stream_filter'));
$this->add_filter('interpolation', 'STRING', array($this, 'str_filter'));
// 3.6 - technically should include the newline, but doesn't really matter
$this->add_pattern('COMMENT', '/%.*/');
// stuff like -module, -author
$this->add_pattern('KEYWORD', '/^-(?:[a-z_]\w*)\\b/m');
// 3.11 integer with radix
$this->add_pattern('BASED_INT', '/[+\\-]?(\d+)#/');
$this->overrides['BASED_INT'] = array($this, 'based_int');
// float
$this->add_pattern('NUMERIC', '/[+\\-]?\d+\.\d+([eE][+\\-]?\d+)?/');
// int
$this->add_pattern('NUMERIC', '/[+\\-]?\d+/');
// 3.7 defines some 'separators', included are . : | || ; , ? -> and #
// we'll capture these separately to operators
// and map it to a keyword, for lack of anything better
$this->add_pattern('SEPARATOR', '/\\|\\||->|[\\.:\\|;,?#]/');
$this->rule_tag_map['SEPARATOR'] = 'KEYWORD';
// 3.9
$this->add_pattern('OPERATOR', '%==|/=|=:=|=<|>=|\\+\\+|--|<-|[+\\-*=!<>/]%');
// 3.9 named ops
$this->add_identifier_mapping('OPERATOR', array('div', 'rem', 'or', 'xor',
'bor', 'bxor', 'bsl', 'bsr', 'and', 'band', 'not', 'bnot'));
// char literals occur after a '$'
$this->add_pattern('CHARACTER', '/\\$(?:(?:\\\\(?:\\^\w+|\d+|.))|.)/');
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
// this looks like a string, but is in fact an 'atom'
// we'll call it a value,
$this->add_pattern('VALUE', LuminousTokenPresets::$SINGLE_STR);
$this->add_pattern('IDENT', '/[a-z][@\w]*/');
$this->add_pattern('VARIABLE', '/[A-Z][@\w]*/');
// 3.8
$this->add_identifier_mapping('KEYWORD', array('after', 'begin', 'case',
'catch', 'cond', 'end', 'fun', 'if', 'let', 'of', 'query', 'receive',
'when',
// reserved, but undefined:
'all_true', 'some_true'
));
$this->add_identifier_mapping('VALUE', array('true', 'false'));
// from the BIF section
$this->add_identifier_mapping('FUNCTION', array(
'atom', 'binary', 'constant', 'float', 'integer', 'function', 'list',
'number', 'pid', 'port', 'reference', 'tuple', 'atom_to_list', 'list_to_atom',
'abs', 'float', 'float_to_list', 'integer_to_list', 'list_to_float',
'list_to_integer', 'round', 'trunc', 'binary_to_list', 'binary_to_term',
'concat_binary', 'list_to_binary', 'size', 'split_binary', 'term_to_binary',
'element', 'list_to_tuple', 'seteleemnt', 'size', 'tuple_to_list', 'hd',
'length', 't1', 'check_process-code', 'delete_module', 'load_module',
'preloaded', 'purge_module', 'module_loaded', 'apply', 'exit', 'group_leader',
'link', 'list_to_pid', 'pid_to_list', 'process_flag', 'process_info',
'processes', 'self', 'spawn', 'spawn_link', 'unlink', 'erase', 'get',
'get_keys', 'put', 'disconnect_node', 'get_cookie', 'halt', 'is_alive',
'monitor_node', 'node', 'nodes', 'processes', 'set_cookie', 'set_node',
'statistics', 'register', 'registered', 'unregister', 'whereis', 'open_port',
'port_close', 'port_info', 'ports', 'date', 'hash', 'make_ref', 'now', 'throw',
'time', 'acos', 'asin', 'atan', 'atan2', 'cos', 'cosh', 'exp', 'log', 'log10',
'pi', 'pow', 'sin', 'sinh', 'tan', 'tanh'));
}
static function guess_language($src, $info) {
$p = 0.0;
foreach(array('module', 'author', 'export', 'include') as $s) {
if (strpos($src, '-' . $s) !== false) $p += 0.02;
}
if (strpos($src, ' ++ ') !== false) $p += 0.01;
if (preg_match('/[a-zA-Z_]\w*#[a-zA-Z_]+/', $src)) $p += 0.05;
// doc comment
if (preg_match('/^%%/m', $src)) $p += 0.05;
return $p;
}
}

73
3rdparty/luminous/languages/go.php vendored Executable file
View File

@ -0,0 +1,73 @@
<?php
/*
* Go.
*
* http://golang.org/doc/go_spec.html
*
* TODO: the different string formats have different escape codes, need
* to override the generic filter to handle this
* also, if there's a standard library API list, that would be useful.
*
*/
class LuminousGoScanner extends LuminousSimpleScanner {
function type_override($matches) {
$this->record($matches[1], 'IDENT');
$this->record($matches[2], null);
$this->record($matches[3], 'USER_FUNCTION');
$this->pos_shift(strlen($matches[0]));
$this->user_defs[$matches[3]] = ($matches[1] === 'type')? 'TYPE'
: 'FUNCTION';
}
function init() {
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_SL);
$ident = '[\p{L}_][\p{L}\p{N}_]*';
// this should be unicode for letter (\p{L}) and number (\p{N})
$this->add_pattern('type', "/\\b(type|func)(\s+)($ident)/u");
$this->overrides['type'] = array($this, 'type_override');
$this->add_pattern('IDENT', "/$ident/u");
$this->add_pattern('OPERATOR', '/[+\\-\\*\\/%&\\|^<>&=!:\\.,;]+/');
$exp = '[eE][+-]?\d+';
// note the trailing i - which denotes imaginary literals
$this->add_pattern('NUMERIC',
"/(?:\d+\.\d*(?:$exp)?|\d+$exp|\.\d+(?:$exp)?)i?/");
$this->add_pattern('NUMERIC', '/(?:0(?:\d+|x[a-fA-F0-9]+)|\d+)i?/');
$this->add_pattern('CHARACTER',
"/'(?:\\\\(?:\d+|[uUxX][a-fA-F0-9]+|.)|.)'/u");
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
$this->add_pattern('STRING', '/`(?:[^`\\\\]+|\\\\.)*(?:`|$)/s');
$this->add_identifier_mapping('KEYWORD', array('break', 'case', 'chan',
'const', 'continue', 'default', 'defer', 'else', 'fallthrough', 'for',
'func', 'go', 'goto', 'if', 'import', 'interface', 'map', 'package',
'range', 'return', 'select', 'struct', 'switch', 'type', 'var'));
$this->add_identifier_mapping('TYPE', array('any', 'bool', 'byte',
'complex', 'complex64', 'complex128', 'int', 'int8', 'int16', 'int32',
'int64', 'float', 'float32', 'float64', 'string', 'struct',
'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'uintptr'));
$this->add_identifier_mapping('VALUE', array('false', 'iota', 'true'));
// from the old luminous language file, don't know how sensible these are
$this->add_identifier_mapping('FUNCTION', array('append', 'cap', 'copy',
'cmplx', 'imag', 'len', 'make', 'new', 'panic', 'print', 'println',
'real', 'recover', 'sizeof'));
}
public static function guess_language($src, $info) {
$p = 0.0;
if (strpos($src, 'func ') !== false) $p += 0.02;
if (preg_match('/func\s*\\(\s*\w+\s*\\*\s*\w+/', $src)) $p += 0.05;
if (preg_match('/^package\s+\w+/', $src)) $p += 0.01;
if (preg_match('/type\s+\w+\s+struct\s*\\{/', $src)) $p += 0.03;
return $p;
}
}

166
3rdparty/luminous/languages/groovy.php vendored Executable file
View File

@ -0,0 +1,166 @@
<?php
/*
* Groovy is pretty much a cross between Python and Java.
* It inherits all of Java's stuff
* http://groovy.codehaus.org/jsr/spec/Chapter03Lexical.html
*/
require_once(dirname(__FILE__) . '/include/java_func_list.php');
class LuminousGroovyScanner extends LuminousSimpleScanner {
public $interpolation = false;
protected $brace_stack = 0;
function regex_override($match) {
assert($this->peek() === '/');
assert($match === array(0=>'/'));
$regex = false;
$i = count($this->tokens);
while ($i--) {
list($tok, $contents) = $this->tokens[$i];
if ($tok === 'COMMENT') continue;
elseif ($tok === 'OPERATOR') $regex = true;
elseif($tok !== null) $regex = false;
else {
$t = rtrim($contents);
if ($t === '') continue;
$c = $t[strlen($t)-1];
$regex = ($c === '(' || $c === '[' || $c === '{');
}
break;
}
if (!$regex) {
$this->record($this->get(), 'OPERATOR');
}
else {
$m = $this->scan('@ / (?: [^\\\\/]+ | \\\\. )* (?: /|$) @sx');
assert($m !== null);
$this->record($m, 'REGEX');
}
}
// string interpolation is complex and it nests, so we do that in here
function interp_string($m) {
// this should only be called for doubly quoted strings
// and triple-double quotes
//
// interpolation is betwee ${ ... }
$patterns = array('interp' => '/(?<!\\$)\\$\\{/');
$start = $this->pos();
if (preg_match('/^"""/', $m[0])) {
$patterns['term'] = '/"""/';
$this->pos_shift(3);
}
else {
assert(preg_match('/^"/', $m[0]));
$patterns['term'] = '/"/';
$this->pos_shift(1);
}
while (1) {
$p = $this->pos();
list($name, $index, $matches) = $this->get_next_named($patterns);
if ($name === null) {
// no matches, terminate
$this->record(substr($this->string(), $start), 'STRING');
$this->terminate();
break;
}
elseif($name === 'term') {
// end of the string
$range = $index + strlen($matches[0]);
$this->record(substr($this->string(),
$start, $range-$start), 'STRING');
$this->pos($range);
break;
} else {
// interpolation, handle this with a subscanner
$this->record(substr($this->string(), $start, $index-$start), 'STRING');
$this->record($matches[0], 'DELIMITER');
$subscanner = new LuminousGroovyScanner($this->string());
$subscanner->interpolation = true;
$subscanner->init();
$subscanner->pos($index + strlen($matches[0]));
$subscanner->main();
$tagged = $subscanner->tagged();
$this->record($tagged, 'INTERPOLATION', true);
$this->pos($subscanner->pos());
if ($this->scan('/\\}/')) $this->record($this->match(), 'DELIMITER');
$start = $this->pos();
}
assert($p < $this->pos());
}
}
// brace override halts scanning if the stack is empty and we hit a '}',
// this is for interpolated code, the top-level scanner doesn't bind to this
function brace($m) {
if ($m[0] === '{') $this->brace_stack++;
elseif($m[0] === '}') {
if ($this->brace_stack <= 0)
return true;
$this->brace_stack--;
}
else assert(0);
$this->record($m[0], null);
$this->pos_shift(strlen($m[0]));
}
function init() {
$this->add_identifier_mapping('KEYWORD',
$GLOBALS['luminous_java_keywords']);
$this->add_identifier_mapping('TYPE', $GLOBALS['luminous_java_types']);
$this->add_identifier_mapping('KEYWORD', array('any', 'as', 'def', 'in',
'with', 'do', 'strictfp',
'println'));
// C+P from python
// so it turns out this template isn't quite as readable as I hoped, but
// it's a triple string, e.g:
// "{3} (?: [^"\\]+ | ""[^"\\]+ | "[^"\\]+ | \\.)* (?: "{3}|$)
$triple_str_template = '%1$s{3} (?> [^%1$s\\\\]+ | %1$s%1$s[^%1$s\\\\]+ | %1$s[^%1$s\\\\]+ | \\\\. )* (?: %1$s{3}|$)';
$str_template = '%1$s (?> [^%1$s\\\\]+ | \\\\. )* (?: %1$s|$)';
$triple_dstr = sprintf($triple_str_template, '"');
$triple_sstr = sprintf($triple_str_template, "'");
$this->add_pattern('COMMENT', '/^#!.*/');
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_SL);
$this->add_pattern('INTERP_STRING', "/$triple_dstr/sx");
$this->add_pattern('STRING', "/$triple_sstr/xs");
$this->add_pattern('INTERP_STRING', LuminousTokenPresets::$DOUBLE_STR);
$this->overrides['INTERP_STRING'] = array($this, 'interp_string');
// differs from java:
$this->add_pattern('STRING', LuminousTokenPresets::$SINGLE_STR);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('IDENT', '/[a-zA-Z_]\w*/');
$this->add_pattern('OPERATOR', '/[~!%^&*\-=+:?|<>]+/');
$this->add_pattern('SLASH', '%/%');
$this->overrides['SLASH'] = array($this, 'regex_override');
if ($this->interpolation) {
$this->add_pattern('BRACE', '/[{}]/');
$this->overrides['BRACE'] = array($this, 'brace');
}
}
static function guess_language($src, $info) {
$p = 0.0;
if (preg_match('/\\bdef\s+\w+\s*=/', $src)) $p += 0.04;
if (preg_match('/println\s+[\'"\w]/', $src)) $p += 0.03;
// Flawed check for interpolation, might match after a string
// terminator
if (preg_match("/\"[^\"\n\r]*\\$\\{/", $src)) $p += 0.05;
// regex literal ~/regex/
if (preg_match('%~/%', $src)) $p += 0.05;
if (preg_match('/^import\s+groovy/m', $src)) $p += 0.2;
return $p;
}
}

99
3rdparty/luminous/languages/haskell.php vendored Executable file
View File

@ -0,0 +1,99 @@
<?php
// Haskell scanner.
// We do not yet support TemplateHaskell because it looks INSANE.
/*
* TODO: Some contextual awareness would be great, Kate seems to highlight
* things differently depending on whether they're in [..] or (...) blocks,
* but I don't understand Haskell enough to embark on that right now.
*
* It would also be nice to distinguish between some different classes of
* operator.
*/
require_once(dirname(__FILE__) . '/include/haskell.php');
class LuminousHaskellScanner extends LuminousSimpleScanner {
// handles comment nesting of multiline comments.
function comment_override() {
$this->nestable_token('COMMENT', '/\\{-/', '/-\\}/');
}
function init() {
// import from ./include/
global $luminous_haskell_functions;
global $luminous_haskell_types;
global $luminous_haskell_values;
global $luminous_haskell_keywords;
$this->add_identifier_mapping('KEYWORD', $luminous_haskell_keywords);
$this->add_identifier_mapping('TYPE', $luminous_haskell_types);
$this->add_identifier_mapping('FUNCTION', $luminous_haskell_functions);
$this->add_identifier_mapping('VALUE', $luminous_haskell_values);
// shebang
$this->add_pattern('COMMENT', '/^#!.*/');
// Refer to the sections in
// http://www.haskell.org/onlinereport/lexemes.html
// for the rules implemented here.
// 2.4
$this->add_pattern('TYPE', '/[A-Z][\'\w]*/');
$this->add_pattern('IDENT', '/[_a-z][\'\w]*/');
// http://www.haskell.org/onlinereport/prelude-index.html
$this->add_pattern('FUNCTION', '/
(?: !!|\\$!?|&&|\\|{1,2}|\\*{1,2}|\\+{1,2}|-(?!-)|\\.|\\/=?|<=?|==|=<<|>>?=?|\\^\\^? )
/x');
$op_chars = '\\+%^\\/\\*\\?#<>:;=@\\[\\]\\|\\\\~\\-!$@%&\\|=';
// ` is used to make a function call into an infix operator
// CRAZY OR WHAT.
$this->add_pattern('OPERATOR', '/`[^`]*`/');
// some kind of function, lambda, maybe.
$this->add_pattern('FUNCTION', "/\\\\(?![$op_chars])\S+/");
// Comments are hard!
// http://www.mail-archive.com/haskell@haskell.org/msg09019.html
// According to this, we can PROBABLY, get away with checking either side
// for non-operator chars followed by at least 2 dashes, but I could well
// be wrong. It'll do for now.
$this->add_pattern('COMMENT', "/(?<![$op_chars])---*(?![$op_chars]).*/");
// nested comments are easy!
$this->add_pattern('NESTED_COMMENT', '/\\{-/');
$this->overrides['NESTED_COMMENT'] = array($this, 'comment_override');
$this->rule_tag_map['NESTED_COMMENT'] = 'COMMENT';
$this->add_pattern('OPERATOR', "/[$op_chars]+/");
// FIXME: the char type is way more discriminating than this
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR_SL);
$this->add_pattern('CHARACTER', LuminousTokenPresets::$SINGLE_STR_SL);
// 2.5
$this->add_pattern('NUMERIC', '/
0[oO]\d+ #octal
|
0[xX][a-fA-F\d]+ #hex
|
# decimal and float can be done at once, according to the grammar
\d+ (?: (?:\.\d+)? (?: [eE][+-]? \d+))?
/x');
}
public static function guess_language($src, $info) {
$p = 0.0;
// comments
if (preg_match('/\\{-.*\\-}/', $src)) $p += 0.05;
// 'import qualified' seems pretty unique
if (preg_match('/^import\s+qualified/m', $src)) $p += 0.05;
// "data SomeType something ="
if (preg_match('/data\s+\w+\s+\w+\s*=/', $src)) $p += 0.05;
return $p;
}
}

252
3rdparty/luminous/languages/html.php vendored Executable file
View File

@ -0,0 +1,252 @@
<?php
class LuminousHTMLScanner extends LuminousEmbeddedWebScript {
private $child_state = null;
public $scripts = true;
// XML literals are part of several languages. Settings this makes the scanner
// halt as soon as it pops the its root tag from the stack, so no trailing
// code is consumed.
public $xml_literal = false;
private $tag_stack = array();
function __construct($src=null) {
$this->dirty_exit_recovery = array(
'DSTRING' => '/[^">]*+("|$|(?=[>]))/',
'SSTRING' => "/[^'>]*+('|$|(?=[>]))/",
'COMMENT1' => '/(?> [^\\-]+ | -(?!->))*(?:-->|$)/x',
'COMMENT2' => '/[^>]*+(?:>|$)/s',
'CDATA' => '/(?>[^\\]]+|\\](?!\\]>))*(?:\\]{2}>|$)/xs',
'ESC' => '/[^;]*+(?:;|$)/',
'TYPE' => '/[^\s]*/',
'VALUE' => '/[^\s]*/',
'HTMLTAG' => '/[^\s]*/',
);
$this->rule_tag_map = array(
'DSTRING' => 'STRING',
'SSTRING' => 'STRING',
'COMMENT1' => 'COMMENT',
'COMMENT2' => 'COMMENT',
'CDATA' => 'COMMENT',
);
parent::__construct($src);
}
function scan_child($lang) {
assert (isset($this->child_scanners[$lang]));
$scanner = $this->child_scanners[$lang];
$scanner->pos($this->pos());
$substr = $scanner->main();
$this->tokens[] = array(null, $scanner->tagged(), true);
$this->pos($scanner->pos());
if ($scanner->interrupt) {
$this->child_state = array($lang, $this->pos());
} else {
$this->child_state = null;
}
}
function init() {
$this->add_pattern('', '/&/');
if ($this->embedded_server) {
$this->add_pattern('TERM', $this->server_tags);
}
$this->add_pattern('', '/</');
$this->state_ = 'global';
if ($this->scripts) {
$js = new LuminousJavaScriptScanner($this->string());
$js->embedded_server = $this->embedded_server;
$js->embedded_html = true;
$js->server_tags = $this->server_tags;
$js->init();
$css = new LuminousCSSScanner($this->string());
$css->embedded_server = $this->embedded_server;
$css->embedded_html = true;
$css->server_tags = $this->server_tags;
$css->init();
$this->add_child_scanner('js', $js);
$this->add_child_scanner('css', $css);
}
}
private $tagname = '';
private $expecting = '';
function main() {
$this->start();
$this->interrupt = false;
while (!$this->eos()) {
$index = $this->pos();
if ($this->embedded_server && $this->check($this->server_tags)) {
$this->interrupt = true;
break;
}
if (!$this->clean_exit) {
try {
$tok = $this->resume();
if ($this->server_break($tok)) break;
$this->record($this->match(), $tok);
} catch (Exception $e) {
if (LUMINOUS_DEBUG) throw $e;
else $this->clean_exit = true;
}
continue;
}
if ($this->child_state !== null && $this->child_state[1] < $this->pos()) {
$this->scan_child($this->child_state[0]);
continue;
}
$in_tag = $this->state_ === 'tag';
if (!$in_tag) {
$next = $this->next_match(false);
if($next) {
$skip = $next[1] - $this->pos();
$this->record($this->get($skip), null);
if ($next[0] === 'TERM') {
$this->interrupt = true;
break;
}
}
} else {
$this->skip_whitespace();
if ($this->embedded_server && $this->check($this->server_tags)) {
$this->interrupt = true;
break;
}
}
$index = $this->pos();
$c = $this->peek();
$tok = null;
$get = false;
if (!$in_tag && $c === '&'
&& $this->scan('/&[^;\s]+;/')
) $tok = 'ESC';
elseif(!$in_tag && $c === '<') {
if ($this->peek(2) === '<!') {
if($this->scan('/(<)(!DOCTYPE)/i')) {
// special case: doctype
$matches = $this->match_groups();
$this->record($matches[1], null);
$this->record($matches[2], 'KEYWORD');
$this->state_ = 'tag';
continue;
}
// urgh
elseif($this->scan('/
<!\\[CDATA\\[
(?> [^\\]]+ | \\](?!\\]>) )*
(?: \\]\\]> | $ )
/ixs'
))
$tok = 'CDATA';
elseif($this->scan('/<!--(?> [^\\-]+ | (?:-(?!->))+)* (?:-->|$)/xs'))
$tok = 'COMMENT1';
elseif($this->scan('/<![^>]*+(?:>|$)/s')) $tok = 'COMMENT2';
else assert(0);
} else {
// check for <script>
$this->state_ = 'tag';
$this->expecting = 'tagname';
$get = true;
}
}
elseif($c === '>') {
$get = true;
$this->state_ = 'global';
if ($this->scripts
&& ($this->tagname === 'script' || $this->tagname === 'style'))
{
$this->record($this->get(), null);
$this->scan_child( ($this->tagname === 'script')? 'js' : 'css');
continue;
}
$this->tagname = '';
}
elseif($in_tag && $this->scan('@/\s*>@')) {
$this->state_ = 'global';
array_pop($this->tag_stack);
}
elseif($in_tag &&
$c === "'" && $this->scan("/' (?> [^'\\\\>]+ | \\\\.)* (?:'|$|(?=>))/xs")) {
$tok = 'SSTRING';
$this->expecting = '';
}
elseif($in_tag &&
$c === '"' && $this->scan('/" (?> [^"\\\\>]+ | \\\\.)* (?:"|$|(?=>))/xs')) {
$tok = 'DSTRING';
$this->expecting = '';
}
elseif($in_tag && $this->scan('@(?:(?<=<)/)?[^\s=<>/]+@') !== null) {
if ($this->expecting === 'tagname') {
$tok = 'HTMLTAG';
$this->expecting = '';
$this->tagname = strtolower($this->match());
if ($this->tagname[0] === '/') array_pop($this->tag_stack);
else $this->tag_stack[] = $this->tagname;
}
elseif($this->expecting === 'value') {
$tok = 'VALUE'; // val as in < a href=*/index.html*
$this->expecting = '';
}
else {
$tok = 'TYPE'; // attr, as in <a *HREF*= ....
}
}
elseif($in_tag && $c === '=') {
$this->expecting = 'value';
$get = true;
}
else $get = true;
if (!$get && $this->server_break($tok)) {break; }
$this->record($get? $this->get(): $this->match(), $tok);
assert ($index < $this->pos() || $this->eos());
if ($this->xml_literal && $this->state_ !== 'tag' && empty($this->tag_stack)) {
return;
}
}
}
public static function guess_language($src, $info) {
$p = 0;
// we have to be a bit careful of XML literals nested in other
// langauges here.
// We also have to becareful to take precedence over embedded CSS and JS
// but leave some room for being embedded in PHP or Rails
// so we're not going to go over 0.75
$doctype = strpos(ltrim($src), '<!DOCTYPE ');
if ($doctype === 0) return 0.75;
if (preg_match('/<(a|table|span|div)\s+class=/', $src)) $p += 0.05;
if (preg_match('%</(a|table|span|div)>%', $src)) $p += 0.05;
if (preg_match('/<(style|script)\\b/', $src)) $p += 0.15;
if (preg_match('/<!\\[CDATA\\[/', $src)) $p += 0.15;
// look for 1 tag at least every 4 lines
$lines = preg_match_all('/$/m',
preg_replace('/^\s+/m', '', $src), $m);
if (preg_match_all('%<[!?/]?[a-zA-Z_:\\-]%', $src, $m)
> $lines/4) $p += 0.15;
return $p;
}
}

10
3rdparty/luminous/languages/identity.php vendored Executable file
View File

@ -0,0 +1,10 @@
<?php
// the identity scanner. Does what you expect.
// Implemented for consistency.
class LuminousIdentityScanner extends LuminousScanner {
public function main() {
$this->record($this->string(), null);
}
}

View File

@ -0,0 +1,301 @@
<?php
/**
* C and C++ keywords/functions
* Some Qt things here as well.
*/
global $luminous_c_funcs;
global $luminous_c_types;
global $luminous_c_keywords;
$luminous_c_keywords = array(
'asm', 'auto', 'break', 'case', 'catch', 'class', 'continue', 'const',
'const_cast', 'connect', 'default', 'delete', 'do', 'dynamic_cast',
'else', 'explicit', 'extern', 'for', 'for_each', 'friend', 'goto',
'if', 'inline', 'mutable', 'namespace', 'new', 'operator', 'private',
'protected', 'public', 'register', 'reinterpret_cast', 'return',
'static', 'static_cast', 'switch', 'sizeof', 'signed',
'template', 'this',
'throw', 'try', 'typedef', 'typeid', 'typename', 'using', 'unsigned',
'while',
'NULL', 'SIGNAL', 'SLOT', 'TRUE', 'FALSE', 'true', 'false');
$luminous_c_types = array(
'bool', 'char', 'clock_t', 'double', 'div_t', 'enum', 'float', 'fpos_t',
'int', 'int8', 'int16', 'int32', 'int64', 'int8_t', 'int16_t', 'int32_t',
'int64_t', 'long', 'ldiv_t', 'short', 'struct', 'size_t', 'ptrdiff_t',
'time_t', 'union', 'uint', 'uint8', 'uint16', 'uint32', 'uint64', 'uint8_t',
'uint16_t', 'uint32_t', 'uint64_t', 'void', 'va_list', 'wchar_t',
// C++ std stuff
'pair', 'list', 'deque', 'queue', 'priority_queue', 'set', 'stack', 'string',
'map', 'multiset', 'multimap', 'hash_set', 'hash_multiset', 'hash_map',
'hash_multimap', 'bitset', 'vector', 'valarray', 'iterator'
);
// http://en.wikipedia.org/wiki/List_of_C_functions
// inb4 deletionists
$luminous_c_funcs = array(
'assert',
'cabs',
'cacos',
'cacosh',
'carg',
'casin',
'casinh',
'catan',
'catanh',
'ccos',
'ccosh',
'cexp',
'cimag',
'cis',
'clog',
'conj',
'cpow',
'cproj',
'creal',
'csin',
'csinh',
'csqrt',
'ctan',
'ctanh',
'digittoint',
'isalnum',
'isalpha',
'isascii',
'isblank',
'iscntrl',
'isdigit',
'isgraph',
'islower',
'isprint',
'ispunct',
'isspace',
'isupper',
'isxdigit',
'toascii',
'tolower',
'toupper',
'imaxabs',
'imaxdiv',
'strtoimax',
'strtoumax',
'wcstoimax',
'wcstoumax',
'localeconv',
'setlocale',
'acos',
'asin',
'atan',
'atan2',
'atof',
'ceil',
'cos',
'cosh',
'exp',
'fabs',
'floor',
'frexp',
'ldexp',
'log',
'log10',
'modf',
'pow',
'sin',
'sinh',
'sqrt',
'tan',
'tanh',
'longjmp',
'setjmp',
'raise',
'va_arg',
'va_copy',
'va_end',
'va_start',
'offsetof',
'clearerr',
'fclose',
'feof',
'ferror',
'fflush',
'fgetc',
'fgetpos',
'fgets',
'fopen',
'freopen',
'fdopen',
'fprintf',
'fputc',
'fputs',
'fread',
'fscanf',
'fseek',
'fsetpos',
'ftell',
'fwrite',
'getc',
'getchar',
'gets',
'perror',
'printf',
'fprintf',
'sprintf',
'snprintf',
'putc',
'putchar',
'fputchar',
'puts',
'remove',
'rename',
'rewind',
'scanf',
'fscanf',
'sscanf',
'vfscanf',
'vscanf',
'vsscanf',
'setbuf',
'setvbuf',
'tmpfile',
'tmpnam',
'ungetc',
'vprintf',
'vfprintf',
'vsprintf',
'abort',
'abs',
'labs',
'atexit',
'atof',
'atoi',
'atol',
'bsearch',
'div',
'ldiv',
'exit',
'free',
'itoa',
'getenv',
'ldiv',
'ltoa',
'malloc',
'alloc',
'realloc',
'qsort',
'rand',
'srand',
'strtod',
'strtol',
'strtoul',
'system',
'memchr',
'memcmp',
'memcpy',
'memmove',
'memset',
'strcat',
'strncat',
'strchr',
'strcmp',
'strncmp',
'strcoll',
'strcpy',
'strncpy',
'strcspn',
'strerror',
'strlen',
'strpbrk',
'strrchr',
'strspn',
'strstr',
'strtok',
'strxfrm',
'asctime',
'clock',
'ctime',
'difftime',
'gmtime',
'localtime',
'mktime',
'strftime',
'time',
'btowc',
'fgetwc',
'fgetws',
'fputwc',
'fputws',
'fwide',
'fwprintf',
'fwscanf',
'getwc',
'getwchar',
'mbrlen',
'mbrtowc',
'mbsinit',
'mbsrtowcs',
'putwc',
'putwchar',
'swprintf',
'swscanf',
'ungetwc',
'vfwprintf',
'vswprintf',
'vwprintf',
'wcrtomb',
'wcscat',
'wcschr',
'wcscmp',
'wcscoll',
'wcscpy',
'wcscspn',
'wcsftime',
'wcslen',
'wcsncat',
'wcsncmp',
'wcsncpy',
'wcspbrk',
'wcsrchr',
'wcsrtombs',
'wcsspn',
'wcsstr',
'wcstod',
'wcstok',
'wcstol',
'wcstoul',
'wcsxfrm',
'wctob',
'wmemchr',
'wmemcmp',
'wmemcpy',
'wmemmove',
'wmemset',
'wprintf',
'wscanf',
'iswalnum',
'iswalpha',
'iswcntrl',
'iswctype',
'iswdigit',
'iswgraph',
'iswlower',
'iswprint',
'iswpunct',
'iswspace',
'iswupper',
'iswxdigit',
'towctrans',
'towlower',
'towupper',
'wctrans',
'wctype',
'farmalloc',
'getch',
'getche',
'gotoxy',
'getaddrinfo',
'getnameinfo',
);

View File

@ -0,0 +1,261 @@
<?php
$luminous_csharp_type_list = array(
// system
'ArgIterator',
'ArraySegment',
'Boolean',
'Byte',
'Char',
'ConsoleKeyInfo',
'DateTime',
'DateTimeOffset',
'Decimal',
'Double',
'Guid',
'Int16',
'Int32',
'Int64',
'IntPtr',
'ModuleHandle',
'Nullable',
'RuntimeArgumentHandle',
'RuntimeFieldHandle',
'RuntimeMethodHandle',
'RuntimeTypeHandle',
'SByte',
'Single',
'TimeSpan',
'TimeZoneInfo',
'TypedReference',
'UInt16',
'UInt32',
'UInt64',
'UIntPtr',
'Void',
// also system
'AccessViolationException',
'ActivationContext',
'Activator',
'AggregateException',
'AppDomain',
'AppDomainManager',
'AppDomainSetup',
'AppDomainUnloadedException',
'ApplicationException',
'ApplicationId',
'ApplicationIdentity',
'ArgumentException',
'ArgumentNullException',
'ArgumentOutOfRangeException',
'ArithmeticException',
'Array',
'ArrayTypeMismatchException',
'AssemblyLoadEventArgs',
'Attribute',
'AttributeUsageAttribute',
'BadImageFormatException',
'BitConverter',
'Buffer',
'CannotUnloadAppDomainException',
'CharEnumerator',
'CLSCompliantAttribute',
'Console',
'ConsoleCancelEventArgs',
'ContextBoundObject',
'ContextMarshalException',
'ContextStaticAttribute',
'Convert',
'DataMisalignedException',
'DBNull',
'Delegate',
'DivideByZeroException',
'DllNotFoundException',
'DuplicateWaitObjectException',
'EntryPointNotFoundException',
'Enum',
'Environment',
'EventArgs',
'Exception',
'ExecutionEngineException',
'FieldAccessException',
'FileStyleUriParser',
'FlagsAttribute',
'FormatException',
'FtpStyleUriParser',
'GC',
'GenericUriParser',
'GopherStyleUriParser',
'HttpStyleUriParser',
'IndexOutOfRangeException',
'InsufficientExecutionStackException',
'InsufficientMemoryException',
'InvalidCastException',
'InvalidOperationException',
'InvalidProgramException',
'InvalidTimeZoneException',
'Lazy',
'LdapStyleUriParser',
'LoaderOptimizationAttribute',
'LocalDataStoreSlot',
'MarshalByRefObject',
'Math',
'MemberAccessException',
'MethodAccessException',
'MissingFieldException',
'MissingMemberException',
'MissingMethodException',
'MTAThreadAttribute',
'MulticastDelegate',
'MulticastNotSupportedException',
'NetPipeStyleUriParser',
'NetTcpStyleUriParser',
'NewsStyleUriParser',
'NonSerializedAttribute',
'NotFiniteNumberException',
'NotImplementedException',
'NotSupportedException',
'Nullable',
'NullReferenceException',
'Object',
'ObjectDisposedException',
'ObsoleteAttribute',
'OperatingSystem',
'OperationCanceledException',
'OutOfMemoryException',
'OverflowException',
'ParamArrayAttribute',
'PlatformNotSupportedException',
'Random',
'RankException',
'ResolveEventArgs',
'SerializableAttribute',
'StackOverflowException',
'STAThreadAttribute',
'String',
'StringComparer',
'SystemException',
'ThreadStaticAttribute',
'TimeoutException',
'TimeZone',
'TimeZoneInfo',
'TimeZoneInfo',
'TimeZoneNotFoundException',
'Tuple',
'Type',
'TypeAccessException',
'TypeInitializationException',
'TypeLoadException',
'TypeUnloadedException',
'UnauthorizedAccessException',
'UnhandledExceptionEventArgs',
'Uri',
'UriBuilder',
'UriFormatException',
'UriParser',
'UriTemplate',
'UriTemplateEquivalenceComparer',
'UriTemplateMatch',
'UriTemplateMatchException',
'UriTemplateTable',
'UriTypeConverter',
'ValueType',
'Version',
'WeakReference',
// system.collections
'ArrayList',
'BitArray',
'CaseInsensitiveComparer',
'CaseInsensitiveHashCodeProvider',
'CollectionBase',
'Comparer',
'DictionaryBase',
'DictionaryEntry',
'Hashtable',
'ICollection',
'IComparer',
'IDictionary',
'IDictionaryEnumerator',
'IEnumerable',
'IEnumerator',
'IEqualityComparer',
'IHashCodeProvider',
'IList',
'IStructuralComparable',
'IStructuralEquatable',
'Queue',
'ReadOnlyCollectionBase',
'SortedList',
'Stack',
'StructuralComparisons',
// System.Collections.Generic
'Comparer',
'Dictionary',
'EqualityComparer',
'HashSet',
'ICollection',
'IComparer',
'IDictionary',
'IEnumerable',
'IEnumerator',
'IEqualityComparer',
'IList',
'IReadOnlyCollection',
'IReadOnlyDictionary',
'IReadOnlyList',
'ISet',
'KeyedByTypeCollection',
'KeyNotFoundException',
'KeyValuePair',
'LinkedList',
'LinkedListNode',
'List',
'Queue',
'SortedDictionary',
'SortedList',
'SortedSet',
'Stack',
'SynchronizedCollection',
'SynchronizedKeyedCollection',
'SynchronizedReadOnlyCollection',
// system.io
'BinaryReader',
'BinaryWriter',
'BufferedStream',
'Directory',
'DirectoryInfo',
'DirectoryNotFoundException',
'DriveInfo',
'DriveNotFoundException',
'EndOfStreamException',
'ErrorEventArgs',
'File',
'FileFormatException',
'FileInfo',
'FileLoadException',
'FileNotFoundException',
'FileStream',
'FileSystemEventArgs',
'FileSystemInfo',
'FileSystemWatcher',
'InternalBufferOverflowException',
'InvalidDataException',
'IODescriptionAttribute',
'IOException',
'MemoryStream',
'Path',
'PathTooLongException',
'PipeException',
'RenamedEventArgs',
'Stream',
'StreamReader',
'StreamWriter',
'StringReader',
'StringWriter',
'TextReader',
'TextWriter',
'UnmanagedMemoryAccessor',
'UnmanagedMemoryStream',
);

View File

@ -0,0 +1,6 @@
<?php
require_once (dirname(__FILE__) . '/../ecmascript.php');
require_once (dirname(__FILE__) . '/../javascript.php');
require_once (dirname(__FILE__) . '/../html.php');

View File

@ -0,0 +1,265 @@
<?php
// http://www.haskell.org/onlinereport/prelude-index.html
global $luminous_haskell_functions;
global $luminous_haskell_types;
global $luminous_haskell_values;
global $luminous_haskell_keywords;
$luminous_haskell_keywords = array('as',
'case',
'of',
'class',
'data',
'family',
'instance',
'default',
'deriving',
'do',
'forall',
'foreign',
'hiding',
'if',
'then',
'else',
'import',
'infix',
'infixl',
'infixr',
'let',
'in',
'mdo',
'module',
'newtype',
'proc',
'qualified',
'rec',
'type',
'where');
$luminous_haskell_types = array(
'Bool',
'Char',
'Double',
'Either',
'FilePath',
'Float',
'Int',
'Integer',
'IO',
'IOError',
'Maybe',
'Ordering',
'ReadS',
'ShowS',
'String',
'Bounded',
'Enum',
'Eq',
'Floating',
'Fractional',
'Functor',
'Integral',
'Monad',
'Num',
'Ord',
'Read',
'Real',
'RealFloat',
'RealFrac',
'Show'
);
$luminous_haskell_values = array(
'EQ',
'False',
'GT',
'Just',
'Left',
'LT',
'Nothing',
'Right',
'True',
);
$luminous_haskell_functions = array(
'abs',
'acos',
'acosh',
'all',
'and',
'any',
'appendFile',
'applyM',
'asTypeOf',
'asin',
'asinh',
'atan',
'atan2',
'atanh',
'break',
'catch',
'ceiling',
'compare',
'concat',
'concatMap',
'const',
'cos',
'cosh',
'curry',
'cycle',
'decodeFloat',
'div',
'divMod',
'drop',
'dropWhile',
'elem',
'encodeFloat',
'enumFrom',
'enumFromThen',
'enumFromThenTo',
'enumFromTo',
'error',
'even',
'exp',
'exponent',
'fail',
'filter',
'flip',
'floatDigits',
'floatRadix',
'floatRange',
'floor',
'fmap',
'foldl',
'foldl1',
'foldr',
'foldr1',
'fromEnum',
'fromInteger',
'fromIntegral',
'fromRational',
'fst',
'gcd',
'getChar',
'getContents',
'getLine',
'head',
'id',
'init',
'interact',
'ioError',
'isDenormalized',
'isIEEE',
'isInfinite',
'isNaN',
'isNegativeZero',
'iterate',
'last',
'lcm',
'length',
'lex',
'lines',
'log',
'logBase',
'lookup',
'map',
'mapM',
'mapM_',
'max',
'maxBound',
'maximum',
'maybe',
'min',
'minBound',
'minimum',
'mod',
'negate',
'not',
'notElem',
'null',
'odd',
'or',
'otherwise',
'pi',
'pred',
'print',
'product',
'properFraction',
'putChar',
'putStr',
'putStrLn',
'quot',
'quotRem',
'read',
'readFile',
'readIO',
'readList',
'readLn',
'readParen',
'reads',
'readsPrec',
'realToFrac',
'recip',
'rem',
'repeat',
'replicate',
'return',
'reverse',
'round',
'scaleFloat',
'scanl',
'scanl1',
'scanr',
'scanr1',
'seq',
'sequence',
'sequence_',
'show',
'showChar',
'showList',
'showParen',
'showString',
'shows',
'showsPrec',
'significand',
'signum',
'sin',
'sinh',
'snd',
'span',
'splitAt',
'sqrt',
'subtract',
'succ',
'sum',
'tail',
'take',
'takeWhile',
'tan',
'tanh',
'toEnum',
'toInteger',
'toRational',
'truncate',
'uncurry',
'undefined',
'unlines',
'until',
'unwords',
'unzip',
'unzip3',
'userError',
'words',
'writeFile',
'zip',
'zip3',
'zipWith',
'zipWith3',);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

268
3rdparty/luminous/languages/include/sql.php vendored Executable file
View File

@ -0,0 +1,268 @@
<?php
// TODO: These are MySQL specific
$keywords = array(
'ABORT', 'ACTION', 'ADD', 'AFTER', 'ALL', 'ALTER', 'ANALYZE', 'AS',
'ASC', 'ATTACH', 'AUTOINCREMENT', 'AUTO_INCREMENT', 'BEFORE', 'BEGIN',
'BY', 'CASCADE', 'CAST', 'CHECK', 'COLLATE', 'COLUMN',
'COMMIT', 'CONFLICT', 'CONSTRAINT', 'CREATE', 'CROSS', 'CURRENT_DATE',
'CURRENT_TIME', 'CURRENT_TIMESTAMP','DATABASE', 'DEFAULT', 'DEFERRABLE',
'DEFERRED', 'DELETE', 'DESC', 'DETACH', 'DISTINCT', 'DROP', 'EACH', 'ELSE',
'END', 'ESCAPE', 'EXCEPT', 'EXCLUSIVE', 'EXISTS', 'EXPLAIN', 'FAIL', 'FOR',
'FOREIGN', 'FROM', 'FULL', 'GLOB', 'GROUP', 'HAVING', 'IF', 'IGNORE',
'IMMEDIATE', 'IN', 'INDEX', 'INDEXED', 'INITITIALLY', 'INNER', 'INSERT',
'INSTEAD', 'INTERSECT', 'INTO', 'ISNULL', 'JOIN', 'KEY', 'LEFT', 'LIKE',
'LIMIT', 'MATCH', 'NATURAL', 'NO', 'NOTNULL', 'OF', 'OFFSET',
'ON', 'OR', 'ORDER', 'OUTER', 'PLAN', 'PRAGMA', 'PRIMARY', 'QUERY', 'RAISE',
'REFERENCES', 'REGEXP', 'REINDEX', 'RELEASE', 'RENAME', 'REPLACE', 'RESTRICT',
'RIGHT', 'ROLLBACK', 'ROW', 'SAVEPOINT', 'SELECT', 'SET', 'TABLE', 'TEMP',
'TEMPORARY', 'THEN', 'TO', 'TRANSACTION', 'TRIGGER', 'UNION', 'UNIQUE',
'UPDATE', 'USING', 'VACUUM', 'VALUES', 'VIEW', 'VIRTUAL', 'WHEN', 'WHERE',
'WITH',
// type qualifier stuff
'SIGNED', 'UNSIGNED', 'ZEROFILL',
// seem to be missing these, probably not standard
'MINVALUE', 'MAXVALUE', 'START'
);
$types = array(
'BINARY', 'BIT', 'BIGINT', 'BIGINTEGER', 'BLOB',
'CHAR', 'CLOB',
'DATE', 'DATETIME', 'DEC', 'DECIMAL', 'DOUBLE', 'DOUBLE_PRECISION',
'ENUM',
'FIXED', 'FLOAT',
'INT', 'INTEGER',
'MEDIUMINT', 'MEDIUMINTEGER',
'NUMERIC',
'REAL',
'SMALLINT', 'SMALLINTEGER',
'SET', 'TEXT', 'TIME', 'TIMESTAMP', 'TINYINT',
'TINYINTEGER',
'VARBINARY', 'VARCHAR',
'YEAR',
'ZONE' // for time zone
);
$values = array('NULL');
// http://dev.mysql.com/doc/refman/5.0/en/func-op-summary-ref.html
$operators = array('AND', 'BETWEEN', 'BINARY', 'CASE', 'DIV', 'IS',
'LIKE', 'NOT', 'SOUNDS', 'XOR');
$functions = array(
'ABS',
'ACOS',
'ADDDATE',
'ADDTIME',
'AES_DECRYPT',
'AES_ENCRYPT',
'ASCII',
'ASIN',
'ATAN2',
'ATAN',
'AVG',
'BENCHMARK',
'BIN',
'BIT_AND',
'BIT_COUNT',
'BIT_LENGTH',
'BIT_OR',
'BIT_XOR',
'CAST',
'CEIL',
'CEILING',
'CHAR_LENGTH',
'CHAR',
'CHARACTER_LENGTH',
'CHARSET',
'COALESCE',
'COERCIBILITY',
'COLLATION',
'COMPRESS',
'CONCAT_WS',
'CONCAT',
'CONNECTION_ID',
'CONV',
'CONVERT_TZ',
'Convert',
'COS',
'COT',
'COUNT',
'COUNT',
'CRC32',
'CURDATE',
'CURRENT_DATE',
'CURRENT_TIME',
'CURRENT_TIMESTAMP',
'CURRENT_USER',
'CURTIME',
'DATABASE',
'DATE_ADD',
'DATE_FORMAT',
'DATE_SUB',
'DATE',
'DATEDIFF',
'DAY',
'DAYNAME',
'DAYOFMONTH',
'DAYOFWEEK',
'DAYOFYEAR',
'DECODE',
'DEFAULT',
'DEGREES',
'DES_DECRYPT',
'DES_ENCRYPT',
'ELT',
'ENCODE',
'ENCRYPT',
'EXP',
'EXPORT_SET',
'EXTRACT',
'FIELD',
'FIND_IN_SET',
'FLOOR',
'FORMAT',
'FOUND_ROWS',
'FROM_DAYS',
'FROM_UNIXTIME',
'GET_FORMAT',
'GET_LOCK',
'GREATEST',
'GROUP_CONCAT',
'HEX',
'HOUR',
'IF',
'IFNULL',
'IN',
'INET_ATON',
'INET_NTOA',
'INSERT',
'INSTR',
'INTERVAL',
'IS_FREE_LOCK',
'IS_USED_LOCK',
'ISNULL',
'LAST_DAY',
'LAST_INSERT_ID',
'LCASE',
'LEAST',
'LEFT',
'LENGTH',
'LN',
'LOAD_FILE',
'LOCALTIME',
'LOCALTIMESTAMP,',
'LOCATE',
'LOG10',
'LOG2',
'LOG',
'LOWER',
'LPAD',
'LTRIM',
'MAKE_SET',
'MAKEDATE',
'MAKETIME',
'MASTER_POS_WAIT',
'MATCH',
'MAX',
'MD5',
'MICROSECOND',
'MID',
'MIN',
'MINUTE',
'MOD',
'MONTH',
'MONTHNAME',
'NAME_CONST',
'NOW',
'NULLIF',
'OCT',
'OCTET_LENGTH',
'OLD_PASSWORD',
'ORD',
'PASSWORD',
'PERIOD_ADD',
'PERIOD_DIFF',
'PI',
'POSITION',
'POW',
'POWER',
'ANALYSE',
'QUARTER',
'QUOTE',
'RADIANS',
'RAND',
'REGEXP',
'RELEASE_LOCK',
'REPEAT',
'REPLACE',
'REVERSE',
'RIGHT',
'RLIKE',
'ROUND',
'ROW_COUNT',
'RPAD',
'RTRIM',
'SCHEMA',
'SEC_TO_TIME',
'SECOND',
'SESSION_USER',
'SHA1',
'SIGN',
'SIN',
'SLEEP',
'SOUNDEX',
'SPACE',
'SQRT',
'STD',
'STDDEV_POP',
'STDDEV_SAMP',
'STDDEV',
'STR_TO_DATE',
'STRCMP',
'SUBDATE',
'SUBSTR',
'SUBSTRING_INDEX',
'SUBSTRING',
'SUBTIME',
'SUM',
'SYSDATE',
'SYSTEM_USER',
'TAN',
'TIME_FORMAT',
'TIME_TO_SEC',
'TIME',
'TIMEDIFF',
'TIMESTAMP',
'TIMESTAMPADD',
'TIMESTAMPDIFF',
'TO_DAYS',
'TRIM',
'TRUNCATE',
'UCASE',
'UNCOMPRESS',
'UNCOMPRESSED_LENGTH',
'UNHEX',
'UNIX_TIMESTAMP',
'UPPER',
'USER',
'UTC_DATE',
'UTC_TIME',
'UTC_TIMESTAMP',
'UUID',
'VALUES',
'VAR_POP',
'VAR_SAMP',
'VARIANCE',
'VERSION',
'WEEK',
'WEEKDAY',
'WEEKOFYEAR',
'YEAR',
'YEARWEEK');

118
3rdparty/luminous/languages/include/vb.php vendored Executable file
View File

@ -0,0 +1,118 @@
<?php
$luminous_vb_values = array('False', 'Nothing', 'True');
// http://msdn.microsoft.com/en-us/library/asfcc119.aspx
$luminous_vb_operators = array('AddressOf', 'And', 'AndAlso', 'GetType',
'GetXmlNamespace', 'Is', 'IsFalse', 'IsNot', 'IsTrue', 'Mod', 'Not',
'Or', 'OrElse', 'TypeOf', 'Xor');
$luminous_vb_types = array('Boolean', 'Byte', 'CBool', 'Cbyte', 'CChar',
'CDate', 'CDbl', 'CDec', 'Char', 'CInt', 'CLng', 'CObj', 'CShort', 'CSng',
'CStr', 'CType', 'Date', 'Decimal', 'Double', 'Integer', 'Long', 'Object',
'Short', 'String');
$luminous_vb_keywords = array(
'AddHandler',
'Alias',
'Ansi',
'As',
'Assembly',
'Auto',
'ByRef',
'ByVal',
'Call',
'Case',
'Catch',
'Class',
'Const',
'Declare',
'Default',
'Delegate',
'Dim',
'DirectCast',
'Do',
'Each',
'Else',
'ElseIf',
'End',
'EndIf',
'Enum',
'Erase',
'Error',
'Event',
'Exit',
'Finally',
'For',
'Friend',
'Function',
'Get',
'GetType',
'GoSub',
'GoTo',
'Handles',
'If',
'Implements',
'Imports',
'In',
'Inherits',
'Interface',
'Let',
'Lib',
'Like',
'Loop',
'Me',
'Module',
'MustInherit',
'MustOverride',
'MyBase',
'MyClass',
'Namespace',
'New',
'Next',
'Nothing',
'NotInheritable',
'NotOverridable',
'On',
'Option',
'Optional',
'OrElse',
'Overloads',
'Overridable',
'Overrides',
'ParamArray',
'Preserve',
'Private',
'Property',
'Protected',
'Public',
'RaiseEvent',
'ReadOnly',
'ReDim',
'RemoveHandler',
'Resume',
'Return',
'Select',
'Set',
'Shadows',
'Shared',
'Single',
'Static',
'Step',
'Stop',
'Structure',
'Sub',
'SyncLock',
'Then',
'Throw',
'To',
'Try',
'Unicode',
'Until',
'Variant',
'Wend',
'When',
'While',
'With',
'WithEvents',
'WriteOnly'
);

View File

@ -0,0 +1,249 @@
<?php
global $luminous_vim_functions;
global $luminous_vim_keywords;
/*
* Credit for this list goes to `nelstrom' on github:
* https://github.com/nelstrom/SyntaxHighlighter/blob/master/scripts/shBrushVimscript.js
*/
$luminous_vim_functions = array( 'abs',
'add', 'append', 'argc', 'argidx', 'argv', 'atan', 'browse', 'browsedir',
'bufexists', 'buflisted', 'bufloaded', 'bufname', 'bufnr', 'bufwinnr',
'byte2line', 'byteidx', 'call', 'ceil', 'changenr', 'char2nr', 'cindent',
'clearmatches', 'col', 'complete', 'complete_add', 'complete_check', 'confirm',
'copy', 'cos', 'count', 'cscope_connection', 'cursor', 'deepcopy', 'delete',
'did_filetype', 'diff_filler', 'diff_hlID', 'empty', 'escape', 'eval',
'eventhandler', 'executable', 'exists', 'expand', 'expr8', 'extend', 'feedkeys',
'filereadable', 'filewritable', 'filter', 'finddir', 'findfile', 'float2nr',
'floor', 'fnameescape', 'fnamemodify', 'foldclosed', 'foldclosedend',
'foldlevel', 'foldtext', 'foldtextresult', 'foreground', 'function',
'garbagecollect', 'get', 'getbufline', 'getbufvar', 'getchar', 'getcharmod',
'getcmdline', 'getcmdpos', 'getcmdtype', 'getcwd', 'getfontname', 'getfperm',
'getfsize', 'getftime', 'getftype', 'getline', 'getloclist', 'getmatches',
'getpid', 'getpos', 'getqflist', 'getreg', 'getregtype', 'gettabwinvar',
'getwinposx', 'getwinposy', 'getwinvar', 'glob', 'globpath', 'has', 'has_key',
'haslocaldir', 'hasmapto', 'histadd', 'histdel', 'histget', 'histnr', 'hlID',
'hlexists', 'hostname', 'iconv', 'indent', 'index', 'input', 'inputdialog',
'inputlist', 'inputrestore', 'inputsave', 'inputsecret', 'insert',
'isdirectory', 'islocked', 'items', 'join', 'keys', 'len', 'libcall',
'libcallnr', 'line', 'line2byte', 'lispindent', 'localtime', 'log10', 'map',
'maparg', 'mapcheck', 'match', 'matchadd', 'matcharg', 'matchdelete',
'matchend', 'matchlist', 'matchstr', 'max', 'min', 'mkdir', 'mode',
'nextnonblank', 'nr2char', 'pathshorten', 'pow', 'prevnonblank', 'printf',
'pumvisible', 'range', 'readfile', 'reltime', 'reltimestr', 'remote_expr',
'remote_foreground', 'remote_peek', 'remote_read', 'remote_send', 'remove',
'rename', 'repeat', 'resolve', 'reverse', 'round', 'search', 'searchdecl',
'searchpair', 'searchpairpos', 'searchpos', 'server2client', 'serverlist',
'setbufvar', 'setcmdpos', 'setline', 'setloclist', 'setmatches', 'setpos',
'setqflist', 'setreg', 'settabwinvar', 'setwinvar', 'shellescape', 'simplify',
'sin', 'sort', 'soundfold', 'spellbadword', 'spellsuggest', 'split', 'sqrt',
'str2float', 'str2nr', 'strftime', 'stridx', 'string', 'strlen', 'strpart',
'strridx', 'strtrans', 'submatch', 'substitute', 'synID', 'synIDattr',
'synIDtrans', 'synstack', 'system', 'tabpagebuflist', 'tabpagenr',
'tabpagewinnr', 'tagfiles', 'taglist', 'tempname', 'tolower', 'toupper', 'tr',
'trunc', 'type', 'values', 'virtcol', 'visualmode', 'winbufnr', 'wincol',
'winheight', 'winline', 'winnr', 'winrestcmd', 'winrestview', 'winsaveview',
'winwidth', 'writefile');
$luminous_vim_keywords = array(
'Next',
'Print',
'XMLent', 'XMLns',
'abc', 'abclear', 'abo', 'aboveleft', 'acd', 'ai', 'akm', 'al', 'aleph',
'all',
'allowrevins',
'altkeymap', 'ambiwidth', 'ambw', 'anti', 'antialias', 'ar', 'arab', 'arabic',
'arabicshape', 'arabshape', 'argadd',
'arga', 'argdelete', 'argdo', 'arge', 'argedit', 'argg', 'argglobal',
'arglargs', 'arglocal', 'argument', 'ari', 'arshape', 'as', 'ascii', 'au',
'augroup', 'auto', 'autochdir', 'autocmd', 'autoindent', 'autoread',
'autowrite', 'autowriteall', 'aw', 'awa',
'bN', 'bNext', 'ba', 'background', 'backspace', 'backup', 'backupcopy',
'backupdir', 'backupext', 'backupskip', 'bad', 'badd', 'ball', 'balloondelay',
'ballooneval', 'balloonexpr', 'bd', 'bdelete', 'bdir', 'bdlay', 'bel',
'belowright', 'beval', 'bex', 'bexpr', 'bf', 'bfirst', 'bg', 'bh', 'bin',
'binary', 'biosk', 'bioskey', 'bk', 'bkc', 'bl', 'blast', 'bm', 'bmodified',
'bn', 'bnext', 'bo', 'bomb', 'botright', 'bp', 'bprevious', 'br', 'brea',
'break', 'breaka', 'breakadd', 'breakat', 'breakd', 'breakdel', 'breakl',
'breaklist', 'brewind', 'brk', 'bro', 'browse', 'browsedir', 'bs', 'bsdir',
'bsk', 'bt', 'bufdo', 'buffer', 'buffers', 'bufhidden', 'buflisted', 'buftype',
'bun', 'bunload', 'bw', 'bwipeout',
'cN', 'cNext', 'cNf', 'cNfile', 'cabc', 'cabclear', 'cad', 'caddb',
'caddbuffer', 'caddexpr', 'caddf', 'caddfile', 'cal', 'call', 'casemap', 'cat',
'catch', 'cb', 'cbuffer', 'cc', 'ccl', 'cclose', 'ccv', 'cd', 'cdpath', 'ce',
'cedit', 'center', 'cex', 'cexpr', 'cf', 'cfile', 'cfir', 'cfirst', 'cfu', 'cg',
'cgetb', 'cgetbuffer', 'cgete', 'cgetexpr', 'cgetfile', 'ch', 'change',
'changes', 'charconvert', 'chd', 'chdir', 'che', 'checkpath', 'checkt',
'checktime', 'ci', 'cin', 'cindent', 'cink', 'cinkeys', 'cino', 'cinoptions',
'cinw', 'cinwords', 'cl', 'cla', 'clast', 'clipboard', 'clist', 'clo', 'close',
'cm', 'cmap', 'cmapc', 'cmapclear', 'cmdheight', 'cmdwinheight', 'cmp', 'cms',
'cn', 'cnew', 'cnewer', 'cnext', 'cnf', 'cnfile', 'cno', 'cnoremap', 'co',
'col', 'colder', 'colo', 'colorscheme', 'columns', 'com', 'comc', 'comclear',
'command', 'comments', 'commentstring', 'comp', 'compatible', 'compiler',
'complete', 'completefunc', 'completeopt', 'con', 'conf', 'confirm', 'consk',
'conskey', 'continue', 'cope', 'copen', 'copy', 'copyindent', 'cot', 'cp',
'cpf', 'cpfile', 'cpo', 'cpoptions', 'cprevious', 'cpt', 'cq', 'cquit', 'cr',
'crewind', 'cscopepathcomp', 'cscopeprg', 'cscopequickfix', 'cscopetag',
'cscopetagorder', 'cscopeverbose', 'cspc', 'csprg', 'csqf', 'cst', 'csto',
'csverb', 'cuc', 'cul', 'cuna', 'cunabbrev', 'cursorcolumn', 'cursorline', 'cw',
'cwh', 'cwindow',
'debug', 'debugg', 'debuggreedy', 'deco', 'def', 'define', 'delc', 'delcombine',
'delcommand', 'delete', 'delf', 'delfunction', 'delm', 'delmarks', 'dex', 'dg',
'di', 'dict', 'dictionary', 'diff', 'diffexpr', 'diffg', 'diffget', 'diffoff',
'diffopt', 'diffpatch', 'diffpu', 'diffput', 'diffsplit', 'diffthis', 'diffu',
'diffupdate', 'dig', 'digraph', 'digraphs', 'dip', 'dir', 'directory',
'display', 'dj', 'djump', 'dl', 'dlist', 'do', 'doautoa', 'doautoall',
'doautocmd', 'dr', 'drop', 'ds', 'dsearch', 'dsp', 'dsplit', 'dy',
'ea', 'ead', 'eadirection', 'earlier', 'eb', 'echoe', 'echoerr', 'echohl',
'echom', 'echomsg', 'echon', 'ed', 'edcompatible', 'edit', 'ef', 'efm', 'ei',
'ek', 'el', 'else', 'elsei', 'elseif', 'em', 'emenu', 'en', 'enc', 'encoding',
'endf', 'endfo', 'endfor', 'endfunction', 'endif', 'endofline', 'endt',
'endtry', 'endw', 'endwhile', 'ene', 'enew', 'environment', 'eol', 'ep',
'equalalways', 'equalprg', 'errorbells', 'errorfile', 'errorformat', 'esckeys',
'et', 'event', 'eventignore', 'ex', 'exi', 'exit', 'expandtab', 'expression',
'exrc', 'exu', 'exusage',
'fcl', 'fcs', 'fdc', 'fde', 'fdi', 'fdl', 'fdls', 'fdm', 'fdn', 'fdo', 'fdt',
'fen', 'fenc', 'fencs', 'fex', 'ff', 'ffs', 'file', 'fileencoding',
'fileencodings', 'fileformat', 'fileformats', 'files', 'filetype', 'fillchars',
'fin', 'fina', 'finally', 'find', 'fini', 'finish', 'fir', 'first', 'fix',
'fixdel', 'fk', 'fkmap', 'flp', 'fml', 'fmr', 'fo', 'fold', 'foldc',
'foldclose', 'foldcolumn', 'foldd', 'folddoc', 'folddoclosed', 'folddoopen',
'foldenable', 'foldexpr', 'foldignore', 'foldlevel', 'foldlevelstart',
'foldmarker', 'foldmethod', 'foldminlines', 'foldnestmax', 'foldo', 'foldopen',
'foldtext', 'for', 'formatexpr', 'formatlistpat', 'formatoptions', 'formatprg',
'fp', 'fs', 'fsync', 'ft', 'fu', 'function',
'gcr', 'gd', 'gdefault', 'gfm', 'gfn', 'gfs', 'gfw', 'ghr', 'go', 'goto', 'gp',
'gr', 'grep', 'grepa', 'grepadd', 'grepformat', 'grepprg', 'gtl', 'gtt',
'guicursor', 'guifont', 'guifontset', 'guifontwide', 'guiheadroom',
'guioptions', 'guipty', 'guitablabel', 'guitabtooltip',
'ha', 'hardcopy', 'help', 'helpf', 'helpfile', 'helpfind', 'helpg', 'helpgrep',
'helpheight', 'helplang', 'helpt', 'helptags', 'hf', 'hh', 'hi', 'hid',
'hidden', 'hide', 'highlight', 'his', 'history', 'hk', 'hkmap', 'hkmapp', 'hkp',
'hl', 'hlg', 'hls', 'hlsearch',
'iabc', 'iabclear', 'ic', 'icon', 'iconstring', 'if', 'ignorecase', 'ij',
'ijump', 'il', 'ilist', 'im', 'imactivatekey', 'imak', 'imap', 'imapc',
'imapclear', 'imc', 'imcmdline', 'imd', 'imdisable', 'imi', 'iminsert', 'ims',
'imsearch', 'inc', 'include', 'includeexpr', 'incsearch', 'inde', 'indentexpr',
'indentkeys', 'indk', 'inex', 'inf', 'infercase', 'ino', 'inoremap',
'insertmode', 'is', 'isearch', 'isf', 'isfname', 'isi', 'isident', 'isk',
'iskeyword', 'isp', 'isplit', 'isprint', 'iuna', 'iunabbrev',
'join', 'joinspaces', 'js', 'ju', 'jumps',
'kee', 'keepalt', 'keepj', 'keepjumps', 'keepmarks', 'key', 'keymap',
'keymodel', 'keywordprg', 'km', 'kmp', 'kp',
'lN', 'lNext', 'lNf', 'lNfile', 'la', 'lad', 'laddb', 'laddbuffer', 'laddexpr',
'laddf', 'laddfile', 'lan', 'langmap', 'langmenu', 'language', 'last',
'laststatus', 'later', 'lazyredraw', 'lb', 'lbr', 'lbuffer', 'lc', 'lcd', 'lch',
'lchdir', 'lcl', 'lclose', 'lcs', 'le', 'left', 'lefta', 'leftabove', 'let',
'lex', 'lexpr', 'lf', 'lfile', 'lfir', 'lfirst', 'lg', 'lgetb', 'lgetbuffer',
'lgete', 'lgetexpr', 'lgetfile', 'lgr', 'lgrep', 'lgrepa', 'lgrepadd', 'lh',
'lhelpgrep', 'linebreak', 'lines', 'linespace', 'lisp', 'lispwords', 'list',
'listchars', 'll', 'lla', 'llast', 'lli', 'llist', 'lm', 'lmak', 'lmake',
'lmap', 'lmapc', 'lmapclear', 'ln', 'lne', 'lnew', 'lnewer', 'lnext', 'lnf',
'lnfile', 'lnoremap', 'lo', 'loadplugins', 'loadview', 'loc', 'lockmarks',
'lockv', 'lockvar', 'lol', 'lolder', 'lop', 'lopen', 'lp', 'lpf', 'lpfile',
'lpl', 'lprevious', 'lr', 'lrewind', 'ls', 'lsp', 'ltag', 'lv', 'lvimgrep',
'lvimgrepa', 'lvimgrepadd', 'lw', 'lwindow', 'lz',
'ma', 'maca', 'macaction', 'macatsui', 'macm', 'macmenu', 'magic', 'mak',
'make', 'makeef', 'makeprg', 'map', 'mapping', 'mark', 'marks', 'mat', 'match',
'matchpairs', 'matchtime', 'maxcombine', 'maxfuncdepth', 'maxmapdepth',
'maxmem', 'maxmempattern', 'maxmemtot', 'mco', 'mef', 'menu', 'menuitems',
'menut', 'menutranslate', 'mfd', 'mh', 'mis', 'mk', 'mkexrc', 'mks',
'mksession', 'mksp', 'mkspell', 'mkspellmem', 'mkv', 'mkvie', 'mkview',
'mkvimrc', 'ml', 'mls', 'mm', 'mmd', 'mmp', 'mmt', 'mod', 'mode', 'modeline',
'modelines', 'modifiable', 'modified', 'more', 'mouse', 'mousef', 'mousefocus',
'mousehide', 'mousem', 'mousemodel', 'mouses', 'mouseshape', 'mouset',
'mousetime', 'move', 'mp', 'mps', 'msm', 'mz', 'mzf', 'mzfile', 'mzq',
'mzquantum', 'mzscheme',
'nbkey', 'new', 'next', 'nf', 'nm', 'nmap', 'nmapc', 'nmapclear', 'nn',
'nnoremap', 'no', 'noexpandtab', 'noh', 'nohlsearch', 'noremap', 'nrformats',
'nu', 'number', 'numberwidth', 'nuw',
'odev', 'oft', 'ofu', 'om', 'omap', 'omapc', 'omapclear', 'omnifunc', 'on',
'only', 'ono', 'onoremap', 'open', 'opendevice', 'operatorfunc', 'opfunc',
'opt', 'option', 'options', 'osfiletype',
'pa', 'para', 'paragraphs', 'paste', 'pastetoggle', 'patchexpr', 'patchmode',
'path', 'pc', 'pclose', 'pdev', 'pe', 'ped', 'pedit', 'penc', 'perl', 'perld',
'perldo', 'pex', 'pexpr', 'pfn', 'ph', 'pheader', 'pi', 'pm', 'pmbcs', 'pmbfn',
'po', 'pop', 'popt', 'popu', 'popup', 'pp', 'ppop', 'pre', 'preserve',
'preserveindent', 'prev', 'previewheight', 'previewwindow', 'previous', 'print',
'printdevice', 'printencoding', 'printexpr', 'printfont', 'printheader',
'printmbcharset', 'printmbfont', 'printoptions', 'prof', 'profd', 'profdel',
'profile', 'prompt', 'promptf', 'promptfind', 'promptr', 'promptrepl', 'ps',
'psearch', 'pt', 'ptN', 'ptNext', 'pta', 'ptag', 'ptf', 'ptfirst', 'ptj',
'ptjump', 'ptl', 'ptlast', 'ptn', 'ptnext', 'ptp', 'ptprevious', 'ptr',
'ptrewind', 'pts', 'ptselect', 'pu', 'pumheight', 'put', 'pvh', 'pvw', 'pw',
'pwd', 'py', 'pyf', 'pyfile', 'python',
'qa', 'qall', 'qe', 'quit', 'quita', 'quitall', 'quoteescape',
'rdt', 'read', 'readonly', 'rec', 'recover', 'red', 'redi', 'redir', 'redo',
'redr', 'redraw', 'redraws', 'redrawstatus', 'redrawtime', 'reg', 'registers',
'remap', 'report', 'res', 'resize', 'restorescreen', 'ret', 'retab', 'retu',
'return', 'revins', 'rew', 'rewind', 'ri', 'right', 'rightb', 'rightbelow',
'rightleft', 'rightleftcmd', 'rl', 'rlc', 'ro', 'rs', 'rtp', 'ru', 'rub',
'ruby', 'rubyd', 'rubydo', 'rubyf', 'rubyfile', 'ruf', 'ruler', 'rulerformat',
'runtime', 'runtimepath', 'rv', 'rviminfo',
'sN', 'sNext', 'sa', 'sal', 'sall', 'san', 'sandbox', 'sargument', 'sav',
'saveas', 'sb', 'sbN', 'sbNext', 'sba', 'sball', 'sbf', 'sbfirst', 'sbl',
'sblast', 'sbm', 'sbmodified', 'sbn', 'sbnext', 'sbo', 'sbp', 'sbprevious',
'sbr', 'sbrewind', 'sbuffer', 'sc', 'scb', 'scr', 'scrip', 'scripte',
'scriptencoding', 'scriptnames', 'scroll', 'scrollbind', 'scrolljump',
'scrolloff', 'scrollopt', 'scs', 'se', 'sect', 'sections', 'secure', 'sel',
'selection', 'selectmode', 'sessionoptions', 'set', 'setf', 'setfiletype',
'setg', 'setglobal', 'setl', 'setlocal', 'sf', 'sfind', 'sfir', 'sfirst', 'sft',
'sh', 'shcf', 'shell', 'shellcmdflag', 'shellpipe', 'shellquote', 'shellredir',
'shellslash', 'shelltemp', 'shelltype', 'shellxquote', 'shiftround',
'shiftwidth', 'shm', 'shortmess', 'shortname', 'showbreak', 'showcmd',
'showfulltag', 'showmatch', 'showmode', 'showtabline', 'shq', 'si',
'sidescroll', 'sidescrolloff', 'sign', 'sil', 'silent', 'sim', 'simalt', 'siso',
'sj', 'sl', 'sla', 'slast', 'sleep', 'slm', 'sm', 'smagic', 'smap', 'smapc',
'smapclear', 'smartcase', 'smartindent', 'smarttab', 'smc', 'smd', 'sme',
'smenu', 'sn', 'snext', 'sni', 'sniff', 'sno', 'snomagic', 'snor', 'snoremap',
'snoreme', 'snoremenu', 'so', 'softtabstop', 'sol', 'something', 'sor', 'sort',
'source', 'sp', 'spc', 'spe', 'spell', 'spellcapcheck', 'spelld', 'spelldump',
'spellfile', 'spellgood', 'spelli', 'spellinfo', 'spelllang', 'spellr',
'spellrepall', 'spellsuggest', 'spellu', 'spellundo', 'spellw', 'spellwrong',
'spf', 'spl', 'split', 'splitbelow', 'splitright', 'spr', 'sprevious', 'sps',
'sr', 'sre', 'srewind', 'srr', 'ss', 'ssl', 'ssop', 'st', 'sta', 'stag', 'stal',
'star', 'startg', 'startgreplace', 'startinsert', 'startofline', 'startr',
'startreplace', 'statusline', 'stj', 'stjump', 'stl', 'stmp', 'stop', 'stopi',
'stopinsert', 'sts', 'stselect', 'su', 'sua', 'suffixes', 'suffixesadd', 'sun',
'sunhide', 'sunme', 'sunmenu', 'sus', 'suspend', 'sv', 'sview', 'sw',
'swapfile', 'swapsync', 'swb', 'swf', 'switchbuf', 'sws', 'sxq', 'syn',
'syncbind', 'synmaxcol', 'syntax',
'tN', 'tNext', 'ta', 'tab', 'tabN', 'tabNext', 'tabc', 'tabclose', 'tabd',
'tabdo', 'tabe', 'tabedit', 'tabf', 'tabfind', 'tabfir', 'tabfirst', 'tabl',
'tablast', 'tabline', 'tabm', 'tabmove', 'tabn', 'tabnew', 'tabnext', 'tabo',
'tabonly', 'tabp', 'tabpagemax', 'tabprevious', 'tabr', 'tabrewind', 'tabs',
'tabstop', 'tag', 'tag_listfiles', 'tagbsearch', 'taglength', 'tagrelative',
'tags', 'tagstack', 'tal', 'tb', 'tbi', 'tbidi', 'tbis', 'tbs', 'tc', 'tcl',
'tcld', 'tcldo', 'tclf', 'tclfile', 'te', 'tearoff', 'tenc', 'term', 'termbidi',
'termencoding', 'terse', 'textauto', 'textmode', 'textwidth', 'tf', 'tfirst',
'tgst', 'th', 'thesaurus', 'throw', 'tildeop', 'timeout', 'timeoutlen', 'title',
'titlelen', 'titleold', 'titlestring', 'tj', 'tjump', 'tl', 'tlast', 'tm',
'tmenu', 'tn', 'tnext', 'to', 'toolbar', 'toolbariconsize', 'top', 'topleft',
'tp', 'tpm', 'tprevious', 'tr', 'trewind', 'try', 'ts', 'tselect', 'tsl', 'tsr',
'ttimeout', 'ttimeoutlen', 'ttm', 'tty', 'ttybuiltin', 'ttyfast', 'ttym',
'ttymouse', 'ttyscroll', 'ttytype', 'tu', 'tunmenu', 'tw', 'tx',
'uc', 'ul', 'una', 'unabbreviate', 'undo', 'undoj', 'undojoin', 'undol',
'undolevels', 'undolist', 'unh', 'unhide', 'unl', 'unlet', 'unlo', 'unlockvar',
'up', 'update', 'updatecount', 'updatetime', 'ut',
'var', 'vb', 'vbs', 'vdir', 've', 'verb', 'verbose', 'verbosefile', 'version',
'vert', 'vertical', 'vfile', 'vi', 'vie', 'view', 'viewdir', 'viewoptions',
'vim', 'vimgrep', 'vimgrepa', 'vimgrepadd', 'viminfo', 'virtualedit', 'visual',
'visualbell', 'viu', 'viusage', 'vm', 'vmap', 'vmapc', 'vmapclear', 'vn', 'vne',
'vnew', 'vnoremap', 'vop', 'vs', 'vsplit',
'wN', 'wNext', 'wa', 'wak', 'wall', 'warn', 'wb', 'wc', 'wcm', 'wd',
'weirdinvert', 'wfh', 'wfw', 'wh', 'whichwrap', 'while', 'wi', 'wig',
'wildchar', 'wildcharm', 'wildignore', 'wildmenu', 'wildmode', 'wildoptions',
'wim', 'win', 'winaltkeys', 'winc', 'wincmd', 'windo', 'window', 'winfixheight',
'winfixwidth', 'winheight', 'winminheight', 'winminwidth', 'winp', 'winpos',
'winsize', 'winwidth', 'wiv', 'wiw', 'wm', 'wmh', 'wmnu', 'wmw', 'wn', 'wnext',
'wop', 'wp', 'wprevious', 'wq', 'wqa', 'wqall', 'wrap', 'wrapmargin',
'wrapscan', 'write', 'writeany', 'writebackup', 'writedelay', 'ws', 'wsverb',
'wv', 'wviminfo', 'ww',
'xa', 'xall', 'xit', 'xm', 'xmap', 'xmapc', 'xmapclear', 'xmenu',
'xn', 'xnremap', 'xnoreme', 'xnoremenu', 'xunme', 'xunmenu',
'yank'
);

34
3rdparty/luminous/languages/java.php vendored Executable file
View File

@ -0,0 +1,34 @@
<?php
require_once(dirname(__FILE__) . '/include/java_func_list.php');
class LuminousJavaScanner extends LuminousSimpleScanner {
function init() {
$this->add_identifier_mapping('KEYWORD',
$GLOBALS['luminous_java_keywords']);
$this->add_identifier_mapping('TYPE', $GLOBALS['luminous_java_types']);
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_SL);
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
$this->add_pattern('CHARACTER', LuminousTokenPresets::$SINGLE_STR);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('IDENT', '/[a-zA-Z$_][$\w]*/');
$this->add_pattern('OPERATOR', '/[!%^&*\-=+:?|<>]+/');
// this is called an annotation
// http://download.oracle.com/javase/1,5.0/docs/guide/language/annotations.html
$this->add_pattern('FUNCTION', '/@[\w]+/');
}
public static function guess_language($src, $info) {
$p = 0;
if (preg_match('/^import\s+java\./m', $src)) return 1.0;
if (preg_match('/System\.out\.print/', $src)) $p += 0.2;
if (preg_match('/public\s+static\s+void\s+main\\b/', $src)) $p += 0.2;
return $p;
}
}

26
3rdparty/luminous/languages/javascript.php vendored Executable file
View File

@ -0,0 +1,26 @@
<?php
class LuminousJavaScriptScanner extends LuminousECMAScriptScanner {
// mostly the same for now
public static function guess_language($src, $info) {
// JavaScript is surprisingly indistinct when you think about it,
// so these are a bit of a stretch and it's hard to ever return a
// value more than ~40%
$p = 0;
// var x =
// not amazingly distinct, but something
if (preg_match('/var\s++\w++\s*+=/', $src)) $p += 0.05;
// $, jquery
if (preg_match('/\\b$\\( | jQuery/x', $src)) $p += 0.20;
// typeof x == undefined
if (preg_match('/typeof\s++\w++\s*+[!=]{2,3}+\s*+[\'"]?+undefined/i', $src))
$p += 0.10;
if (strpos($src, 'document.') !== false) $p += 0.10;
if (strpos($src, 'Math.') !== false) $p += 0.05;
// Anonymous functions
if (preg_match('/function\s*+\\([^)]*+\\)\s*+\\{/', $src))
$p += 0.05;
return $p;
}
}

106
3rdparty/luminous/languages/json.php vendored Executable file
View File

@ -0,0 +1,106 @@
<?php
class LuminousJSONScanner extends LuminousScanner {
private $stack = array();
public function init() {
$this->add_identifier_mapping('KEYWORD', array('true', 'false', 'null'));
}
public function state() {
if (!empty($this->stack)) return $this->stack[count($this->stack)-1][0];
else return null;
}
private function expecting($x=null) {
if ($x !== null) {
if (!empty($this->stack)) $this->stack[count($this->stack)-1][1] = $x;
}
if (!empty($this->stack)) return $this->stack[count($this->stack)-1][1];
else return null;
}
function main() {
while (!$this->eos()) {
$tok = null;
$c = $this->peek();
list($state, $expecting) = array($this->state(), $this->expecting());
$this->skip_whitespace();
if ($this->eos()) break;
if ($this->scan(LuminousTokenPresets::$NUM_REAL) !== null) {
$tok = 'NUMERIC';
}
elseif($this->scan('/[a-zA-Z]\w*/')) {
$tok = 'IDENT';
}
elseif($this->scan(LuminousTokenPresets::$DOUBLE_STR)) {
$tok = ($state === 'obj' && $expecting === 'key')? 'TYPE' : 'STRING';
}
elseif($this->scan('/\[/')) {
$this->stack[] = array('array', null);
$tok = 'OPERATOR';
}
elseif($this->scan('/\]/')) {
if ($state === 'array') {
array_pop($this->stack);
$tok = 'OPERATOR';
}
}
elseif($this->scan('/\{/')) {
$this->stack[] = array('obj', 'key');
$tok = 'OPERATOR';
}
elseif($state === 'obj' && $this->scan('/\}/')) {
array_pop($this->stack);
$tok = 'OPERATOR';
}
elseif($state === 'obj' && $this->scan('/:/')) {
$this->expecting('value');
$tok = 'OPERATOR';
}
elseif($this->scan('/,/')) {
if ($state === 'obj') {
$this->expecting('key');
$tok = 'OPERATOR';
}
elseif($state === 'array') $tok = 'OPERATOR';
}
else $this->scan('/./');
$this->record($this->match(), $tok);
}
}
public static function guess_language($src, $info) {
// JSON is fairly hard to guess
$p = 0;
$src_ = trim($src);
if (!empty($src_)) {
$char = $src_[0];
$char2 = $src_[strlen($src_)-1];
$str = '"(?>[^"\\\\]+|\\\\.)"';
// looks like an object or array
if ( ($char === '[' && $char2 === ']')
|| ($char === '{' && $char2 === '}'))
{
$p += 0.05;
}
elseif(preg_match("/^(?:$str|(\d+(\.\d+)?([eE]\d+)?)|true|false|null)$/",
$src_))
{
// just a string or number or value
$p += 0.1;
}
}
return $p;
}
}

68
3rdparty/luminous/languages/latex.php vendored Executable file
View File

@ -0,0 +1,68 @@
<?php
/*
* LaTeX scanner,
* brief explanation: we're using the stateful scanner to handle marginally
* different rulesets in math blocks.
* We could add in an awful lot of detail, everything is pretty generic right
* now, we don't look for any specific names or anything, but it'll suffice
* for basic highlighting.
*/
class LuminousLatexScanner extends LuminousStatefulScanner {
function init() {
// math states
$this->add_pattern('displaymath', '/\\$\\$/', '/\\$\\$/');
// literal '\[' and '\]'
$this->add_pattern('displaymath', '/\\\\\\[/', '/\\\\\\]/');
$this->add_pattern('mathmode', '/\\$/', '/\\$/');
// terminals
$this->add_pattern('COMMENT', '/%.*/');
$this->add_pattern('NUMERIC', '/\d+(\.\d+)?\w*/');
$this->add_pattern('MATH_FUNCTION', '/\\\\(?:[a-z_]\w*|[^\]])/i');
$this->add_pattern('MATHOP', '/[\\*^\\-=+]+/');
$this->add_pattern('FUNCTION', '/\\\\(?:[a-z_]\w*|.)/i');
$this->add_pattern('IDENT', '/[a-z_]\w*/i');
$this->add_pattern('OPERATOR', '/[\[\]\{\}]+/');
$math_transition = array('NUMERIC', 'MATH_FUNCTION', 'MATHOP');
$this->transitions = array(
'initial' => array('COMMENT', 'OPERATOR', 'displaymath', 'mathmode',
'FUNCTION', 'IDENT'),
// omitting initial state defn. makes it transition to everything
'displaymath' => $math_transition,
'mathmode' => $math_transition,
);
$this->rule_tag_map = array(
'displaymath' => 'INTERPOLATION',
'mathmode' => 'INTERPOLATION',
'MATHOP' => 'OPERATOR',
'MATH_FUNCTION' => 'VALUE', // arbitrary way to distinguish it from non
// math mode functions
);
}
public static function guess_language($src, $info) {
$p = 0.0;
foreach(array('documentclass', 'usepackage', 'title',
'maketitle', 'end') as $cmd)
{
if (strpos($src, '\\' . $cmd) !== false) $p += 0.1;
}
// count the number of backslashes
$bslashes = substr_count($src, '\\');
if ($bslashes > $info['num_lines']) {
$p += 0.1;
}
if (substr_count($src, '%') > $info['num_lines']/10) {
$p += 0.02;
}
return $p;
}
}

83
3rdparty/luminous/languages/lolcode.php vendored Executable file
View File

@ -0,0 +1,83 @@
<?PHP
/*
* HAI
* I HAS PERSONAL INTEREST IN LOLCODE THATS WHY ITS HERE KTHX.
* BTW PHP IS MOSTLY CASE INSENSITIVE
*/
CLASS LUMINOUSLOLCODESCANNER EXTENDS LUMINOUSSIMPLESCANNER {
FUNCTION FUNCDEF_OVERRIDE($MATCHES) {
$this->RECORD($MATCHES[0], 'KEYWORD');
$this->POS_SHIFT(STRLEN($MATCHES[0]));
$this->skip_whitespace();
IF ($this->SCAN('/[a-z_]\w*/i')) {
$this->RECORD($this->MATCH(), 'USER_FUNCTION');
$this->user_defs[$this->MATCH()] = 'FUNCTION';
}
}
FUNCTION STR_FILTER($TOKEN) {
$TOKEN = LUMINOUSUTILS::ESCAPE_TOKEN($TOKEN);
$STR = &$TOKEN[1];
$STR = PREG_REPLACE('/:
(?:
(?:[\)o":]|&gt;)
|\([a-fA-F0-9]*\)
|\[[A-Z ]*\]
|\{\w*\}
)/x', '<VARIABLE>$0</VARIABLE>', $STR);
RETURN $TOKEN;
}
FUNCTION INIT() {
$this->ADD_FILTER('STRING', array($this, 'STR_FILTER'));
$this->REMOVE_FILTER('constant');
$this->ADD_PATTERN('COMMENT', '/(?s:OBTW.*?TLDR)|BTW.*/');
$this->ADD_PATTERN('STRING', '/" (?> [^":]+ | :.)* "/x');
$this->ADD_PATTERN('STRING', "/' (?> [^':]+ | :.)* '/x");
$this->ADD_PATTERN('OPERATOR',
'/
\\b
(?:
(?:ALL|ANY|BIGGR|BOTH|DIFF|EITHER|PRODUKT|QUOSHUNT
|MOD|SMALLR|SUM|WON)\s+OF\\b
|
BOTH\s+SAEM\\b
|
(?:BIGGR|SMALLR)\s+THAN\\b
|
(?:AN|NOT)\\b
)
/x');
$this->ADD_PATTERN('FUNC_DEF', '/how\s+duz\s+i\\b/i');
$this->overrides['FUNC_DEF'] = array($this, 'FUNCDEF_OVERRIDE');
$this->ADD_PATTERN('NUMERIC', LUMINOUSTOKENPRESETS::$NUM_REAL);
$this->ADD_PATTERN('IDENT', '/[a-zA-Z_]\w*\\??/');
$this->ADD_IDENTIFIER_MAPPING('VALUE', array('FAIL', 'WIN'));
$this->ADD_IDENTIFIER_MAPPING('TYPE', array('NOOB', 'NUMBAR', 'NUMBR',
'TROOF', 'YARN'));
$this->ADD_IDENTIFIER_MAPPING('KEYWORD', array('A', 'CAN',
'DUZ', 'HAI',
'KTHX', 'KTHXBYE', 'HAS', 'HOW', 'I', 'IM', 'IN', 'IS', 'IZ',
'ITS', 'ITZ',
'IF', 'FOUND', 'GTFO', 'MAEK', 'MEBBE', 'NO', 'NOW', 'O', 'OIC',
'OMG', 'OMGWTF', 'RLY', 'RLY?', 'R', 'SAY', 'SO', 'TIL', 'YA', 'YR', 'U',
'WAI', 'WILE', 'WTF?'));
$this->ADD_IDENTIFIER_MAPPING('FUNCTION', array('GIMMEH', 'VISIBLE',
'UPPIN', 'NERFIN'));
}
public static function guess_language($src, $info) {
$p = 0.0;
foreach(array('OMGWTF', 'I CAN HAS', 'GTFO', 'HOW DUZ I', 'IM IN YR',
'IM IN UR', 'I HAS A', 'I HAZ A', ' UPPIN', 'NERFIN', 'TROOF', 'NUMBAR',
'NUMBR') as $str)
{
if (strpos($src, " $str ") !== false) $p += 0.1;
}
return $p;
}
}

45
3rdparty/luminous/languages/matlab.php vendored Executable file
View File

@ -0,0 +1,45 @@
<?php
/*
* Matlab's pretty simple. Hoorah
*/
class LuminousMATLABScanner extends LuminousSimpleScanner {
// Comments can nest. This beats a PCRE recursive regex, because they are
// pretty flimsy and crash/stack overflow easily
function comment_override($matches) {
$this->nestable_token('COMMENT', '/%\\{/', '/%\\}/');
}
function init() {
// these can nest so we override this
$this->add_pattern('COMMENT_ML', '/%\\{/');
$this->add_pattern('COMMENT', '/%.*/');
$this->add_pattern('IDENT', '/[a-z_]\w*/i');
// stray single quotes are a unary operator when they're attached to
// an identifier or return value, or something. so we're going to
// use a lookbehind to exclude those
$this->add_pattern('STRING',
"/(?<![\w\)\]\}']) ' (?: [^']+ | '')* ($|')/x");
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('OPERATOR', "@[¬!%^&*\-+=~;:|<>,./?]+|'@");
$this->overrides = array('COMMENT_ML' => array($this, 'comment_override'));
include(dirname(__FILE__) . '/include/matlab_func_list.php');
$this->add_identifier_mapping('KEYWORD', $luminous_matlab_keywords);
$this->add_identifier_mapping('VALUE', $luminous_matlab_values);
$this->add_identifier_mapping('FUNCTION', $luminous_matlab_functions);
}
public static function guess_language($src, $info) {
$p = 0;
// matlab comments are quite distinctive
if (preg_match('/%\\{.*%\\}/s', $src)) $p += 0.25;
return $p;
}
}

525
3rdparty/luminous/languages/perl.php vendored Executable file
View File

@ -0,0 +1,525 @@
<?php
/*
* Like ruby, I think it's impossible to fully tokenize Perl without
* executing some of the code to disambiguate some symbols. As such, we're
* going to settle for 'probably right' rather than 'definitely right'.
*
* TODO: I think this is mostly complete but it needs interpolation
* highlighting in strings and heredoc, and a regex highlighting filter,
* probably a stream filter
*/
class LuminousPerlScanner extends LuminousSimpleScanner {
// keeps track of heredocs we need to handle
private $heredoc = null;
// helper function:
// consumes a string until the given delimiter (which may be balanced).
// will handle nested balanced delimiters.
// this is used as the general case for perl quote-operators like:
// q/somestring/ q"somestring", q@somestring@, q[some[]string]
// it can be called twice for s/someregex/somereplacement/
// expects the initial opening delim to already have been consumed
function consume_string($delimiter, $type) {
$close = LuminousUtils::balance_delimiter($delimiter);
$balanced = $close !== $delimiter;
$patterns = array( '/(?<!\\\\)((?:\\\\\\\\)*)('
. preg_quote($close, '/') . ')/');
if ($balanced) {
$patterns[] = '/(?<!\\\\)((?:\\\\\\\\)*)('
. preg_quote($delimiter, '/') . ')/';
}
$stack = 1; // we're already inside the string
$start = $this->pos();
$close_delimiter_match = null;
while($stack) {
$next = $this->get_next($patterns);
if ($next[0] === -1) {
$this->terminate();
$finish = $this->pos();
break;
}
elseif($balanced && $next[1][2] === $delimiter) {
$stack++;
$finish = $next[0] + strlen($next[1][0]);
}
elseif($next[1][2] === $close) {
$stack--;
if (!$stack)
$close_delimiter_match = $next[1][2];
$finish = $next[0] + strlen($next[1][1]);
}
else assert(0);
$this->pos($next[0] + strlen($next[1][0]));
}
$substr = substr($this->string(), $start, $finish-$start);
// special case for qw, the string is not a 'STRING', it is actually
// a whitespace separated list of strings. So we need to split it and
// record them separately
if ($type === 'SPLIT_STRING') {
foreach(preg_split('/(\s+)/',
$substr, -1, PREG_SPLIT_DELIM_CAPTURE) as $token) {
if (preg_match('/^\s/', $token)) {
$this->record($token, null);
} else {
$this->record($token, 'STRING');
}
}
} else {
$this->record($substr, $type);
}
if ($close_delimiter_match !== null) {
$this->record($close_delimiter_match, 'DELIMITER');
}
}
// Helper function: guesses whether or not a slash is a regex delimiter
// by looking behind in the token stream.
function is_delimiter() {
for($i = count($this->tokens) - 1; $i >= 0; $i--) {
$t = $this->tokens[$i];
if ($t[0] === null || $t[0] === 'COMMENT') continue;
elseif ($t[0] === 'OPENER' || $t[0] === 'OPERATOR') return true;
elseif ($t[0] === 'IDENT') {
switch($t[1]) {
// named operators
case 'lt':
case 'gt':
case 'le':
case 'ge':
case 'eq':
case 'ne':
case 'cmp':
case 'and':
case 'or':
case 'xor':
// other keywords/functions
case 'if':
case 'elsif':
case 'while':
case 'unless':
case 'split':
case 'print':
return true;
}
}
return false;
}
return true;
}
// override function for slashes, to disambiguate regexen from division
// operators.
function slash_override($matches) {
$this->pos( $this->pos() + strlen($matches[0]) );
// this can catch '//', which I THINK is an operator but I could be wrong.
if (strlen($matches[0]) === 2 || !$this->is_delimiter()) {
$this->record($matches[0], 'OPERATOR');
} else {
$this->record($matches[0], 'DELIMITER');
$this->consume_string($matches[0], 'REGEX');
if ($this->scan('/[cgimosx]+/')) {
$this->record($this->match(), 'KEYWORD');
}
}
}
// override function for 'quote-like operators'
// e.g. m"hello", m'hello', m/hello/, m(hello), m(he()l()o())
function str_override($matches) {
$this->pos( $this->pos() + strlen($matches[0]) );
$this->record($matches[0], 'DELIMITER');
$f = $matches[1];
$type = 'STRING';
if ($f === 'm' || $f === 'qr' || $f === 's' || $f === 'tr'
|| $f === 'y') $type = 'REGEX';
elseif($f === 'qw') $type = 'SPLIT_STRING';
$this->consume_string($matches[3], $type);
if ($f === 's' || $f === 'tr' || $f === 'y') {
// s/tr/y take two strings, e.g. s/something/somethingelse/, so we
// have to consume the next delimiter (if it exists) and consume the
// string, again.
// if delims were balanced, there's a new delimiter right here, e.g.
// s[something][somethingelse]
$this->skip_whitespace();
$balanced = LuminousUtils::balance_delimiter($matches[3]) !== $matches[3];
if ($balanced) {
$delim2 = $this->scan('/[^a-zA-Z0-9]/');
if ($delim2 !== null) {
$this->record($delim2, 'DELIMITER');
$this->consume_string($delim2, 'STRING');
}
}
// if they weren't balanced then the delimiter is the same, and has
// already been consumed as the end-delim to the first pattern
else {
$this->consume_string($matches[3], 'STRING');
}
}
if ($type === 'REGEX' && $this->scan('/[cgimosxpe]+/')) {
$this->record($this->match(), 'KEYWORD');
}
}
// this override handles the heredoc declaration, and makes a note of it
// it adds a new token (a newline) which is overridden to invoke the real
// heredoc handling. This is because in Perl, heredocs declarations need not
// be the end of the line so we can't necessarily start heredocing straight
// away.
function heredoc_override($matches) {
list($group, $op, $quote1, $delim, $quote2) = $matches;
$this->record($op, 'OPERATOR');
// Now, if $quote1 is '\', then $quote2 is empty. If quote2 is empty
// but quote1 is not '\', this is not a heredoc.
if ($quote1 === '\\' && $quote2 === '') {
$this->record($quote1 . $delim, 'DELIMITER');
} elseif($quote2 === '' && $quote1 !== '') {
// this is the error case
// shift to the end of the op and break
$this->pos_shift(strlen($op));
return;
} else {
$this->record($quote1 . $delim . $quote2, 'DELIMITER');
}
$this->pos_shift(strlen($group));
// TODO. the quotes (matches[2] and matches[4]) are ignored for now, but
// they mean something w.r.t interpolation.
$this->heredoc = $delim;
$this->add_pattern('HEREDOC_NL', "/\n/");
$this->overrides['HEREDOC_NL'] = array($this, 'heredoc_real_override');
}
// this override handles the actual heredoc text
function heredoc_real_override($matches) {
$this->record($matches[0], null);
$this->pos_shift(strlen($matches[0]));
// don't need this anymore
$this->remove_pattern('HEREDOC_NL');
assert($this->heredoc !== null);
$delim = preg_quote($this->heredoc);
$substr = $this->scan_until('/^' . $delim . '\\b/m');
if ($substr !== null) {
$this->record($substr, 'HEREDOC');
$delim_ = $this->scan('/' . $delim . '/');
assert($delim !== null);
$this->record($delim_, 'DELIMITER');
} else {
$this->record($this->rest(), 'HEREDOC');
$this->terminate();
}
}
// halts highlighting on __DATA__ and __END__
function term_override($matches) {
$this->record($matches[0], 'DELIMITER');
$this->pos( $this->pos() + strlen($matches[0]) );
$this->record($this->rest(), null);
$this->terminate();
}
// pod cuts might be very long and trigger the backtrack limit, so
// we do it the old fashioned way
function pod_cut_override($matches) {
$line = $this->scan('/^=.*/m');
assert($line !== null);
$term = '/^=cut$|\\z/m';
$substr = $this->scan_until($term);
assert($substr !== null);
$end = $this->scan($term);
assert($end !== null);
$this->record($line . $substr . $end, 'DOCCOMMENT');
}
function init() {
$this->add_pattern('COMMENT', '/#.*/');
// pod/cut documentation
$this->add_pattern('podcut', '/^=[a-zA-Z_]/m');
$this->overrides['podcut'] = array($this, 'pod_cut_override');
// variables
$this->add_pattern('VARIABLE', '/[\\$%@][a-z_]\w*/i');
// special variables http://www.kichwa.com/quik_ref/spec_variables.html
$this->add_pattern('VARIABLE', '/\\$[\|%=\-~^\d&`\'+_\.\/\\\\,"#\\$\\?\\*O\\[\\];!@]/');
// `backticks` (shell cmd)
$this->add_pattern('CMD', '/`(?: [^`\\\\]++ | \\\\ . )*+ (?:`|$)/x');
// straight strings
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
$this->add_pattern('STRING', LuminousTokenPresets::$SINGLE_STR);
// terminators
$this->add_pattern('TERM', '/__(?:DATA|END)__/');
// heredoc (overriden)
$this->add_pattern('HEREDOC', '/(<<)([\'"`\\\\]?)([a-zA-Z_]\w*)(\\2?)/');
// operators, slash is a special case and is overridden
$this->add_pattern('OPERATOR', '/[!%^&*\-=+;:|,\\.?<>~\\\\]+/');
$this->add_pattern('SLASH', '%//?%');
// we care about 'openers' for regex-vs-division disambiguatation
$this->add_pattern('OPENER', '%[\[\{\(]+%x');
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
// quote-like operators. we override these.
// I got these out of the old luminous tree, I don't know how accurate
// or complete they are.
// According to psh, delimiters can be escaped?
$this->add_pattern('DELIMETERS',
'/(q[rqxw]?|m|s|tr|y)([\s]*)(\\\\?[^a-zA-Z0-9\s])/');
$this->add_pattern('IDENT', '/[a-zA-Z_]\w*/');
$this->overrides['DELIMETERS'] = array($this, 'str_override');
$this->overrides['SLASH'] = array($this, 'slash_override');
$this->overrides['HEREDOC'] = array($this, 'heredoc_override');
$this->overrides['TERM'] = array($this, 'term_override');
// map cmd to a 'function' and get rid of openers
$this->rule_tag_map = array(
'CMD' => 'FUNCTION',
'OPENER' => null,
);
// this sort of borks with the strange regex delimiters
$this->remove_filter('pcre');
/************************************************************************/
// data definition follows.
// https://www.physiol.ox.ac.uk/Computing/Online_Documentation/Perl-5.8.6/index-functions-by-cat.html
$this->add_identifier_mapping('KEYWORD', array( 'bless',
'caller', 'continue', 'dbmclose', 'dbmopen',
'defined',
'delete', 'die', 'do', 'dump', 'else', 'elsif',
'eval', 'exit', 'for', 'foreach', 'goto', 'import', 'if', 'last', 'local',
'my',
'next', 'no',
'our', 'package', 'prototype', 'redo', 'ref', 'reset',
'return', 'require', 'scalar', 'sub', 'tie', 'tied',
'undef',
'utie',
'unless', 'use', 'wantarray', 'while'));
$this->add_identifier_mapping('OPERATOR', array('lt', 'gt', 'le',
'ge', 'eq', 'ne', 'cmp', 'and', 'or', 'xor'));
$this->add_identifier_mapping('FUNCTION', array(
'chomp',
'chop',
'chr',
'crypt',
'hex',
'index',
'lc',
'lcfirst',
'length',
'oct',
'ord',
'pack',
'reverse',
'rindex',
'sprintf',
'substr',
'uc',
'ucfirst',
'pos',
'quotemeta',
'split',
'study',
'abs',
'atan2',
'cos',
'exp',
'hex',
'int',
'log',
'oct',
'rand',
'sin',
'sqrt',
'srand',
'pop',
'push',
'shift',
'splice',
'unshift',
'grep',
'join',
'map',
'reverse',
'sort',
'unpack',
'delete',
'each',
'exists',
'keys',
'values',
'binmode',
'close',
'closedir',
'dbmclose',
'dbmopen',
'die',
'eof',
'fileno',
'flock',
'format',
'getc',
'print',
'printf',
'read',
'readdir',
'readline',
'rewinddir',
'seek',
'seekdir',
'select',
'syscall',
'sysread',
'sysseek',
'syswrite',
'tell',
'telldir',
'truncate',
'warn',
'write',
'pack',
'read',
'syscall',
'sysread',
'sysseek',
'syswrite',
'unpack',
'vec',
'chdir',
'chmod',
'chown',
'chroot',
'fcntl',
'glob',
'ioctl',
'link',
'lstat',
'mkdir',
'open',
'opendir',
'readlink',
'rename',
'rmdir',
'stat',
'symlink',
'sysopen',
'umask',
'unlink',
'utime',
'alarm',
'exec',
'fork',
'getpgrp',
'getppid',
'getpriority',
'kill',
'pipe',
'qx/STRING/',
'readpipe',
'setpgrp',
'setpriority',
'sleep',
'system',
'times',
'wait',
'waitpid',
'accept',
'bind',
'connect',
'getpeername',
'getsockname',
'getsockopt',
'listen',
'recv',
'send',
'setsockopt',
'shutdown',
'socket',
'socketpair',
'msgctl',
'msgget',
'msgrcv',
'msgsnd',
'semctl',
'semget',
'semop',
'shmctl',
'shmget',
'shmread',
'shmwrite',
'endgrent',
'endhostent',
'endnetent',
'endpwent',
'getgrent',
'getgrgid',
'getgrnam',
'getlogin',
'getpwent',
'getpwnam',
'getpwuid',
'setgrent',
'setpwent',
'endprotoent',
'endservent',
'gethostbyaddr',
'gethostbyname',
'gethostent',
'getnetbyaddr',
'getnetbyname',
'getnetent',
'getprotobyname',
'getprotobynumber',
'getprotoent',
'getservbyname',
'getservbyport',
'getservent',
'sethostent',
'setnetent',
'setprotoent',
'setservent',
'gmtime',
'localtime',
'time',
'times'));
}
public static function guess_language($src, $info) {
// check the shebang
if (preg_match('/^#!.*\\bperl\\b/', $src)) return 1.0;
$p = 0;
if (preg_match('/\\$[a-zA-Z_]+/', $src)) $p += 0.02;
if (preg_match('/@[a-zA-Z_]+/', $src)) $p += 0.02;
if (preg_match('/%[a-zA-Z_]+/', $src)) $p += 0.02;
if (preg_match('/\\bsub\s+\w+\s*\\{/', $src)) $p += 0.1;
if (preg_match('/\\bmy\s+[$@%]/', $src)) $p += 0.05;
// $x =~ s/
if (preg_match('/\\$[a-zA-Z_]\w*\s+=~\s+s\W/', $src)) $p += 0.15;
return $p;
}
}

251
3rdparty/luminous/languages/php.php vendored Executable file
View File

@ -0,0 +1,251 @@
<?php
require_once( dirname(__FILE__) . '/include/php_func_list.php');
/*
* This is not a scanner called by an external interface, it's controlled
* by LuminousPHPScanner (defined in this file).
*
* It should break when it sees a '?>', but it should assume it's in php
* when it's called.
*/
class LuminousPHPSubScanner extends LuminousScanner {
protected $case_sensitive = false;
public $snippet = false;
function init() {
$this->add_pattern('TERM', '/\\?>/');
$this->add_pattern('COMMENT', '% (?://|\#) .*? (?=\\?>|$) %xm');
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
// this should be picked up by the LuminousPHPScanner, but in case
// a user incorrectly calls the PHP-snippet scanner, we detect it.
$this->add_pattern('DELIMITER', '/<\?(?:php)?/');
$this->add_pattern('OPERATOR', '@[!%^&*\\-=+~:<>/\\|\\.;,]+|\\?(?!>)@');
$this->add_pattern('VARIABLE', '/\\$\\$?[a-zA-Z_]\w*/');
$this->add_pattern('IDENT', '/[a-zA-Z_]\w*/');
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
$this->add_pattern('STRING', LuminousTokenPresets::$SINGLE_STR);
$this->add_pattern('FUNCTION', '/`(?>[^`\\\\]+|\\\\.)*(`|$)/s');
$this->add_identifier_mapping('FUNCTION', $GLOBALS['luminous_php_functions']);
$this->add_identifier_mapping('KEYWORD', $GLOBALS['luminous_php_keywords']);
$this->add_filter('STRING', array($this, 'str_filter'));
$this->add_filter('HEREDOC', array($this, 'str_filter'));
$this->add_filter('NOWDOC', array($this, 'nowdoc_filter'));
}
static function str_filter($token) {
if ($token[1][0] !== '"' && $token[0] !== 'HEREDOC') return $token;
elseif(strpos($token[1], '$') === false) return $token;
$token = LuminousUtils::escape_token($token);
// matches $var, ${var} and {$var} syntax
$token[1] = preg_replace('/
(?: \$\{ | \{\$ ) [^}]++ \}
|
\$\$?[a-zA-Z_]\w*
/x', '<VARIABLE>$0</VARIABLE>',
$token[1]);
return $token;
}
static function nowdoc_filter($token) {
$token[0] = 'HEREDOC';
return $token;
}
function main() {
$this->start();
while (!$this->eos()) {
$tok = null;
$index = $this->pos();
if (($match = $this->next_match()) !== null) {
$tok = $match[0];
if ($match[1] > $index) {
$this->record(substr($this->string(), $index, $match[1] - $index), null);
}
} else {
$this->record($this->rest(), null);
$this->terminate();
break;
}
if ($tok === 'TERM') {
$this->unscan();
break;
}
if($tok === 'IDENT') {
// do the user defns here, i.e. class XYZ extends/implements ABC
// or function XYZ
$m = $this->match();
$this->record($m, 'IDENT');
if (($m === 'class' || $m === 'function' || $m === 'extends' || $m === 'implements')
&& $this->scan('/(\s+)([a-zA-Z_]\w*)/') )
{
$this->record($this->match_group(1), null);
$this->record($this->match_group(2), 'USER_FUNCTION');
$this->user_defs[$this->match_group(2)] = ($m === 'function')? 'FUNCTION'
: 'TYPE';
}
continue;
}
elseif($tok === 'OPERATOR') {
// figure out heredoc syntax here
if (strpos($this->match(), '<<<') !== false) {
$this->record($this->match(), $tok);
$this->scan('/([\'"]?)([\w]*)((?:\\1)?)/');
$g = $this->match_groups();
$nowdoc = false;
if ($g[1]) {
// nowdocs are delimited by single quotes. Heredocs MAY be
// delimited by double quotes, or not.
$nowdoc = $g[1] === "'";
$this->record($g[1], null);
}
$delimiter = $g[2];
$this->record($delimiter, 'KEYWORD');
if ($g[3]) $this->record($g[3], null);
// bump us to the end of the line
if (strlen($this->scan('/.*/')))
$this->record($this->match(), null);
if ($this->scan_until("/^$delimiter|\z/m")) {
$this->record($this->match(), ($nowdoc)? 'NOWDOC' : 'HEREDOC');
if ($this->scan('/\w+/'))
$this->record($this->match(), 'KEYWORD');
}
continue;
}
}
assert($this->pos() > $index);
$this->record($this->match(), $tok);
}
}
}
/*
* This is a controller class which handles alternating between PHP and some
* other language (currently HTML only, TODO allow plain text as well)
* PHP and the other language are handled by subscanners
*/
class LuminousPHPScanner extends LuminousScanner {
/// the 'non-php' scanner
protected $subscanner;
/// the real php scanner
protected $php_scanner;
/// If it's a snippet, we assume we're starting in PHP mode.
public $snippet = false;
function __construct($src=null) {
$this->subscanner = new LuminousHTMLScanner($src);
$this->subscanner->embedded_server = true;
$this->subscanner->init();
$this->php_scanner = new LuminousPHPSubScanner($src);
$this->php_scanner->init();
parent::__construct($src);
}
function string($s=null) {
if ($s !== null) {
$this->subscanner->string($s);
$this->php_scanner->string($s);
}
return parent::string($s);
}
protected function scan_php($delimiter) {
if ($delimiter !== null)
$this->record($delimiter, 'DELIMITER');
$this->php_scanner->pos($this->pos());
$this->php_scanner->main();
$this->record($this->php_scanner->tagged(),
($delimiter === '<?=')? 'INTERPOLATION' : null, true);
$this->pos($this->php_scanner->pos());
assert($this->eos() || $this->check('/\\?>/'));
if ($this->scan('/\\?>/'))
$this->record($this->match(), 'DELIMITER');
}
protected function scan_child() {
$this->subscanner->pos($this->pos());
$this->subscanner->main();
$this->pos($this->subscanner->pos());
assert($this->eos() || $this->check('/<\\?/'));
$this->record($this->subscanner->tagged(), null, true);
}
function main() {
while (!$this->eos()) {
$p = $this->pos();
if ($this->snippet)
$this->scan_php(null);
elseif ($this->scan('/<\\?(?:php|=)?/'))
$this->scan_php($this->match());
else
$this->scan_child();
assert($this->pos() > $p);
}
}
static function guess_language($src, $info) {
// cache p because this function is hit by the snippet scanner as well
static $p = 0.0;
static $src_ = null;
if ($src_ === $src) {
return $p;
}
// look for delimiter tags
if (strpos($src, '<?php') !== false) $p += 0.5;
elseif (preg_match('/<\\?(?!xml)/', $src)) $p += 0.20;
// check for $this, self:: parent::
if (preg_match('/\\$this\\b|((?i: self|parent)::)/x', $src)) $p += 0.15;
// check for PHP's OO notation: $somevar->something
if (preg_match('/\\$[a-z_]\w*+->[a-z_]/i', $src)) $p += 0.05;
// check for some common functions:
if (preg_match('/\\b(echo|require(_once)?|include(_once)?|preg_\w)/i',
$src)) $p += 0.05;
$src_ = $src;
return $p;
}
}
class LuminousPHPSnippetScanner extends LuminousPHPScanner {
public $snippet = true;
public static function guess_language($src, $info) {
$p = parent::guess_language($src, $info);
if ($p > 0.0) {
// look for the close/open tags, if there is no open tag, or if
// there is a close tag before an open tag, then we guess we're
// in a snippet
// if we are in a snippet we need to come out ahead of php, and
// if we're not then we need to be behind it.
$open_tag = strpos($src, '<?');
$close_tag = strpos($src, '?>');
if ($open_tag === false ||
($close_tag !== false && $close_tag < $open_tag))
{
$p += 0.01;
}
else $p -= 0.01;
}
return $p;
}
}

412
3rdparty/luminous/languages/python.php vendored Executable file
View File

@ -0,0 +1,412 @@
<?php
/*
* Python scanner - includes Django
*
* TODO: Django does not respect {% comment %} ... {% endcomment %}
*/
class LuminousPythonScanner extends LuminousScanner {
public $django = false;
public function init() {
$this->remove_filter('comment-to-doc');
// so it turns out this template isn't quite as readable as I hoped, but
// it's a triple string, e.g:
// "{3} (?: [^"\\]+ | ""[^"\\]+ | "[^"\\]+ | \\.)* (?: "{3}|$)
$triple_str_template = '%1$s{3} (?> [^%1$s\\\\]+ | %1$s%1$s[^%1$s\\\\]+ | %1$s[^%1$s\\\\]+ | \\\\. )* (?: %1$s{3}|$)';
$str_template = '%1$s (?> [^%1$s\\\\]+ | \\\\. )* (?: %1$s|$)';
$triple_dstr = sprintf($triple_str_template, '"');
$triple_sstr = sprintf($triple_str_template, "'");
$this->add_pattern('IDENT', '/[a-zA-Z_](?>\w*)(?!["\'])/');
// I *assume* that Django tags terminate these
$this->add_pattern('COMMENT', sprintf('/\#.*%s/',
$this->django? '(?=[%}]\})' : ''));
// decorator
$this->add_pattern('TYPE', '/@(\w+\.?)+/');
// Python strings may be prefixed with r (raw) or u (unicode).
// This affects how it handles backslashes, but I don't *think* it
// affects escaping of quotes....
$this->add_pattern('STRING', "/[RUru]?$triple_dstr/xs");
$this->add_pattern('STRING', "/[RUru]?$triple_sstr/xs");
$this->add_pattern('STRING', "/[RUru]?" . sprintf($str_template, '"') . '/sx');
$this->add_pattern('STRING', "/[RUru]?" . sprintf($str_template, "'") . '/xs');
// EPIC.
$this->add_pattern('NUMERIC', '/
#hex
(?:0[xX](?>[0-9A-Fa-f]+)[lL]*)
|
# binary
(?:0[bB][0-1]+)
|
#octal
(?:0[oO0][0-7]+)
|
# regular number
(?:
(?>[0-9]+)
(?:
# long identifier
[lL]
|
# Or a fractional part, which may be imaginary
(?:
(?:\.?(?>[0-9]+)?
(?:(?:[eE][\+\-]?)?(?>[0-9]+))?
)[jJ]?
)
)?
)
|
(
# or only after the point, float x = .1;
\.(?>[0-9]+)(?:(?:[eE][\+\-]?)?(?>[0-9]+))?[jJ]?
)
/x');
// %} and }} are django terminators
if ($this->django) {
$this->add_pattern('TERM', '/[%}]\}/');
}
// catch the colon separately so we can use $match === ':' in figuring out
// where docstrs occur
$this->add_pattern('OPERATOR', '/\+=|-=|\*=|\/=|>=|<=|!=|==|\*\*|[!%^*\-=+;<>\\\\(){}\[\],\\.:]/');
if ($this->django) {
// Django specific keywords
// https://docs.djangoproject.com/en/1.3/ref/templates/builtins/
$this->add_identifier_mapping('KEYWORD', array('autoescape',
'endautoescape', 'cycle', 'filter', 'endfilter', 'include',
'extends', 'firstof', 'empty', 'ifchanged', 'endifchanged',
'ifequal', 'endifequal', 'ifnotequal', 'endifnotequal',
'load', 'now', 'regroup', 'spaceless', 'endspaceless',
'ssi', 'url', 'widthratio', 'endwith',
'endfor', 'endif',
'endwhile'));
}
$this->add_identifier_mapping('KEYWORD', array('assert', 'as', 'break',
'class', 'continue', 'del', 'def', 'elif', 'else', 'except', 'exec',
'finally', 'for', 'from', 'global', 'if', 'import', 'lambda',
'print', 'pass', 'raise', 'return', 'try', 'while', 'yield',
'with',
'and', 'not', 'in', 'is', 'or', 'print'));
$this->add_identifier_mapping('FUNCTION', array('all', 'abs', 'any',
'basestring', 'bin', 'callable', 'chr', 'classmethod', 'cmp', 'compile',
'dir', 'divmod', 'enumerate', 'eval', 'execfile', 'file', 'filter',
'format',
'frozenset', 'getattr', 'globals', 'hasattr', 'hash', 'help', 'hex',
'id', 'input', 'isinstance', 'issubclass', 'iter', 'len', 'locals', 'map',
'max', 'min', 'memoryview', 'next', 'object', 'oct', 'open', 'ord', 'pow',
'property', 'range', 'raw_input', 'reduce', 'reload', 'repr', 'reversed',
'round', 'setattr', 'slice', 'sorted', 'staticmethod', 'sum', 'super',
'type', 'unichr', 'vars', 'xrange', 'zip', '__import__',
'bytearray', 'complex', 'dict', 'float', 'int', 'list', 'long',
'set', 'str', 'tuple', 'unicode', 'apply', 'buffer', 'coerce', 'intern'
));
// http://docs.python.org/library/exceptions.html
$this->add_identifier_mapping('TYPE',
array('BaseException', 'SystemExit',
'KeyboardInterrupt', 'GeneratorExit', 'Exception', 'StopIteration',
'StandardError', 'BufferError', 'ArithmeticError',
'FloatingPointError', 'OverflowError', 'ZeroDivisionError',
'AssertionError',
'AttributeError', 'EnvironmentError', 'IOError', 'OSError',
'WindowsError(Windows)', 'VMSError(VMS)', 'EOFError', 'ImportError',
'LookupError', 'IndexError', 'KeyError', 'MemoryError', 'NameError',
'UnboundLocalError', 'ReferenceError', 'RuntimeError',
'NotImplementedError',
'SyntaxError', 'IndentationError', 'TabError', 'SystemError', 'TypeError',
'ValueError', 'UnicodeError', 'UnicodeDecodeError', 'UnicodeEncodeError',
'UnicodeTranslateError', 'Warning', 'DeprecationWarning',
'PendingDeprecationWarning', 'RuntimeWarning', 'SyntaxWarning',
'UserWarning',
'FutureWarning', 'ImportWarning', 'UnicodeWarning', 'BytesWarning'));
$this->add_identifier_mapping('VALUE', array('False', 'None', 'self',
'True'));
}
// mini-scanner to handle highlighting module names in import lines
private function import_line() {
$import = false;
$from = false;
while(!$this->eol()) {
$c = $this->peek();
$tok = null;
$m = null;
if ($c === '\\') $m = $this->get(2);
elseif($this->scan('/[,\\.;\\*]+/')) $tok = 'OPERATOR';
elseif($this->scan("/[ \t]+/")){}
elseif(($m = $this->scan('/import\\b|from\\b/'))){
if ($m === 'import') $import = true;
elseif($m === 'from') $from = true;
else assert(0);
$tok = 'IDENT';
}
elseif($this->scan('/[_a-zA-Z]\w*/')) {
assert($from || $import);
// from module import *item*, or just import *item*
if ($import) {
$tok = 'USER_FUNCTION';
$this->user_defs[$this->match()] = 'TYPE';
}
// from *module* ...[import item], the module is not imported
else $tok = 'IDENT';
}
else break;
$this->record(($m !== null)? $m : $this->match(), $tok);
}
}
function main() {
$definition = false;
$doccstr = false;
$expect = '';
while (!$this->eos()) {
$tok = null;
$index = $this->pos();
if (($rule = $this->next_match()) !== null) {
$tok = $rule[0];
if ($rule[1] > $index) {
$this->record(substr($this->string(), $index, $rule[1] - $index), null);
}
} else {
$this->record(substr($this->string(), $index), null);
$this->terminate();
break;
}
// Django terminator tag - break to superscanner
if ($tok === 'TERM') {
$this->unscan();
break;
}
$m = $this->match();
/* python doc strs are a pain because they're actually just strings.
* Also, I'm pretty sure a string in a non-interesting place just counts
* as a no-op and is also used as a comment sometimes
* So we've got something a bit complicated going on here: if we meet
* a 'class' or a 'def' (function def) then we wait until the next ':'
* and say "we expect a doc-str now". If the next token is not a string,
* we discard that state.
*
* similarly, if we meet a string which isn't a doc-str, we look behind
* and expect to see an operator or open bracket, else it's a comment.
* NOTE: we class ':' as a legal string preceding char because it's used
* as dictionary key:value separators. This will fail on the case:
*
* while 1:
* "do something"
* break
*
*
* NOTE: note we're skipping whitespace.
* NOTE: we disable the no-op detection for Django because the string
* might be inside an output tag.
*
*/
if ($definition && $doccstr) {
if($tok === 'STRING')
$tok = 'COMMENT';
}
elseif ($tok === 'STRING' && !$this->django) {
$i = count($this->tokens);
$tok = 'COMMENT';
while ($i--) {
$t = $this->tokens[$i][0];
$s = $this->tokens[$i][1];
if ($t === null || $t === 'COMMENT') continue;
elseif ($t === 'OPERATOR' || $t === 'IDENT' || $t === 'NUMERIC') {
$tok = 'STRING';
}
break;
}
// finally, if we can look ahead to a binary operator, or so,
// we concede it probably is a string
if ($tok === 'COMMENT') {
if ($this->check('/\s*(?: [+:&.,] | (?:and|or|is|not)\\b)/x'))
$tok = 'STRING';
}
}
// reset this; if it didn't catch above then it's not valid now.
if ($definition && $doccstr) {
$definition = false;
$doccstr = false;
}
if ($tok === 'IDENT') {
if ($m === 'import' || $m === 'from') {
$this->unscan();
$this->import_line();
continue;
}
// these are definition keywords, the next token should be an
// identifier, which is a user-defined type or function
if ($m === 'class' || $m === 'def') {
$definition = true;
$expect = 'user_def';
}
// this is caught on the next iteration
elseif($expect === 'user_def') {
$tok = 'USER_FUNCTION';
$expect = false;
$this->user_defs[$m] = 'FUNCTION';
}
}
else {
// if this hasn't caught, it's not valid
$expect = false;
}
if ($definition && $m === ':') {
$doccstr = true;
}
$this->record($m, $tok);
}
}
public static function guess_language($src, $info) {
if (strpos($info['shebang'], 'python') !== false) return 1.0;
if ($info['shebang']) return 0.0;
$p = 0.0;
// let's look for some trademark pythonic constructs, although I
// have a feeling that recent versions of ECMA also impelment some
// of this
if (preg_match('/^\s*+ for \s++ \w++ \s++ in \s++ \w++ \s*+ :/xm', $src))
$p += 0.05;
if (preg_match('/True|False|None/', $src)) $p += 0.01;
if (preg_match('/"{3}|\'{3}/', $src)) $p += 0.05;
// class something(object)
//
if (preg_match('/^\s*+ class \s++ \w++ \s*+ \( \s*+ object \s*+ \)/xm',
$src)) $p += 0.1;
// def __init__ (constructor)
if (preg_match('/\\bdef \s++ __init__\\b/x', $src)) $p += 0.2;
// method decorators
if (preg_match("/^\s*+ @[\w\\.]++ .*+ [\n\r]++ \s*+ def\\b/mx", $src))
$p += 0.1;
// pmax = 0.41
// common imports: import os|sys|re
if (preg_match('/^import\s++(os|sys|re)\\b/m', $src))
$p += 0.05;
// from x import y
if (preg_match('/^\s*+ from \s++ (?:\w++(?:\.\w++)*+) \s++ import \s/xm',
$src))
$p += 0.10;
return $p;
}
}
class LuminousDjangoScanner extends LuminousScanner {
// warning: some copying and pasting with the rails scanner here
// HTML scanner has to be persistent.
private $html_scanner;
public function init() {
$this->html_scanner = new LuminousHTMLScanner();
$this->html_scanner->string($this->string());
$this->html_scanner->embedded_server = true;
$this->html_scanner->server_tags = '/\{[{%#]/';
$this->html_scanner->init();
}
public function scan_html() {
$this->html_scanner->pos($this->pos());
$this->html_scanner->main();
$this->record($this->html_scanner->tagged(), null, true);
$this->pos($this->html_scanner->pos());
}
public function scan_python($short=false) {
$python_scanner = new LuminousPythonScanner($this->string());
$python_scanner->django = true;
$python_scanner->init();
$python_scanner->pos($this->pos());
$python_scanner->main();
$this->record($python_scanner->tagged(), $short? 'INTERPOLATION' : null, true);
$this->pos($python_scanner->pos());
}
public function main() {
while(!$this->eos()) {
$p = $this->pos();
// django's tags are {{ }} and {% %}
// there's also a {# #} comment tag but we can probably handle that here
// more easily
// same for {% comment %} ... {% endcomment %}
if ($this->scan('/\{([{%])/')) {
$match = $this->match();
$m1 = $this->match_group(1);
// {% comment %} ... {% endcomment %}
if ($this->scan('/\s*comment\s*%\}/')) {
$match .= $this->match();
$end_pattern = '/\{%\s*endcomment\s*%\}/';
if ($this->scan_until($end_pattern) !== null) {
$match .= $this->match();
$match .= $this->scan($end_pattern);
}
else {
$match .= $this->rest();
$this->terminate();
}
$this->record($match, 'COMMENT');
}
// {{ ... }} or {% ... %}
else {
$this->record($match, 'DELIMITER');
$this->scan_python($m1 === '{');
if ($this->scan('/[}%]\}/')) {
$this->record($this->match(), 'DELIMITER');
}
}
// {# ... #}
} elseif($this->scan('/\{\# (?: [^\#]++ | \#(?! \} ) )*+ (?: \#\} | $)/x')) {
$this->record($this->match(), 'COMMENT');
}
else {
$this->scan_html();
}
assert($p < $this->pos());
}
}
public static function guess_language($src, $info) {
if (($html = LuminousHTMLScanner::guess_language($src, $info)) >= 0.2) {
if (strpos($src, '{{') !== false || strpos($src, '{%') !== false)
return $html + 0.01;
}
return 0.0;
}
}

70
3rdparty/luminous/languages/rails.php vendored Executable file
View File

@ -0,0 +1,70 @@
<?php
/*
* Rails. Basically a wrapper around Ruby and HTML.
*/
class LuminousRailsScanner extends LuminousScanner {
// HTML scanner has to be persistent. Ruby doesn't.
private $html_scanner;
public function init() {
$this->html_scanner = new LuminousHTMLScanner();
$this->html_scanner->string($this->string());
$this->html_scanner->embedded_server = true;
$this->html_scanner->server_tags = '/<%/';
$this->html_scanner->init();
}
public function scan_html() {
$this->html_scanner->pos($this->pos());
$this->html_scanner->main();
$this->record($this->html_scanner->tagged(), null, true);
$this->pos($this->html_scanner->pos());
}
public function scan_ruby($short=false) {
$ruby_scanner = new LuminousRubyScanner($this->string());
$ruby_scanner->rails = true;
$ruby_scanner->init();
$ruby_scanner->pos($this->pos());
$ruby_scanner->main();
$this->record($ruby_scanner->tagged(), $short? 'INTERPOLATION' : null, true);
$this->pos($ruby_scanner->pos());
}
public function main() {
while(!$this->eos()) {
$p = $this->pos();
if ($this->scan('/<%#?([\-=]?)/')) {
$this->record($this->match(), 'DELIMITER');
$this->scan_ruby($this->match_group(1) === '=');
if ($this->scan('/-?%>/')) {
$this->record($this->match(), 'DELIMITER');
}
}
else {
$this->scan_html();
}
assert($p < $this->pos());
}
}
public static function guess_language($src, $info) {
$p = LuminousRubyScanner::guess_language($src, $info);
if ($p > 0) {
if (preg_match('/<%.*%>/', $src)) $p += 0.02;
else $p = 0.0;
$p = min($p, 1);
}
return $p;
}
}

522
3rdparty/luminous/languages/ruby.php vendored Executable file
View File

@ -0,0 +1,522 @@
<?php
/*
* Ruby's grammar is basically insane. We're not going to aim to correctly
* highlight all legal Ruby code because we'll be here all year and we'll still
* get it wrong, but we're going to have a go at getting the standard stuff
* right as well as:
* heredocs
* balanced AND NESTED string/regex delimiters
* interpolation
*
* disclaimer: I don't actually know Ruby.
*
* Problem is that Ruby *appears* to have to disambiguate loads of stuff at
* runtime, which is frankly a little optimistic for a syntax highlighter.
* Ruby allows you to omit calling parantheses, so it's not practical (and
* impossible if the code snippet is incomplete) to figure out operator/operand
* position. e.g.
* x = y %r/z/x
* is x = y mod r div z div x, unless y is a function, in which case it's:
* x = y( /z/x ) where /z/x is a regex
*/
class LuminousRubyScanner extends LuminousScanner {
// set to true if this is a nested scanner which needs to exit if it
// encounters a } while nothing else is on the stack, i.e. it is being
// used to process an interpolated block
public $interpolation = false;
protected $curley_braces = 0; // poor man's curly brace stack.
public $rails = false;
// operators depend somewhat on whether or not rails is active, else we
// don't want to consume a '%' if it comes right before a '>', we want
// to leave that for the rails close-tag detection
private $operator_regex = null;
private $string_regex = null;
private $comment_regex = null;
// gaaah
private $numeric = '/
(?:
#control codes
(?:\?(?:\\\[[:alpha:]]-)*[[:alpha:]])
|
#hex
(?:0[xX](?>[0-9A-Fa-f]+)[lL]*)
|
# binary
(?:0[bB][0-1]+)
|
#octal
(?:0[oO0][0-7]+)
|
# regular number
(?:
(?>[0-9]+)
(?:
# fraction
(?:
(?:\.?(?>[0-9]+)?
(?:(?:[eE][\+\-]?)?(?>[0-9]+))?
)
)
)?
)
|
(
# or only after the point, float x = .1;
\.(?>[0-9]+)(?:(?:[eE][\+\-]?)?(?>[0-9]+))?
)
)
(?:_+\d+)*
/x';
/// queue of heredoc declarations which will need to be handled as soon as EOL is reached
/// each element is a tuple: (delimiter(str), identable?, interpolatable?)
private $heredocs = array();
public function init() {
$this->comment_regex =
$this->rails? "/ \# (?: [^\n%]*+ | %(?!>))* /x"
: '/#.*/';
// http://www.zenspider.com/Languages/Ruby/QuickRef.html#23
$this->operator_regex = '/
\? | ;
| ::? | \*[=\*]? | \/=? | -=? | %=? | ^=? | &&? | \|\|? | \.{2,3}
| \^=?
| < (?:=>|<|=)? | >=?
| =[>~] | ={1,3}
| \+=? | ![=~]?
/x';
// $this->operator_regex = '/(?: [~!^&*\-+=:;|<>\/?';
// if ($this->rails) $this->operator_regex .= ']+|%(?!>))+';
// else $this->operator_regex .= '%]+)';
// $this->operator_regex .= '/x';
$this->add_identifier_mapping('KEYWORD', array('BEGIN', 'END', 'alias',
'begin', 'break', 'case', 'class', 'def', 'defined?', 'do',
'else', 'elsif', 'end', 'ensure', 'for', 'if', 'module', 'next',
'redo', 'rescue', 'retry', 'return', 'self', 'super', 'then',
'undef', 'unless', 'until', 'when', 'while', 'yield',
'false', 'nil', 'self', 'true', '__FILE__', '__LINE__', 'TRUE', 'FALSE',
'NIL', 'STDIN', 'STDERR', 'ENV', 'ARGF', 'ARGV', 'DATA', 'RUBY_VERSION',
'RUBY_RELEASE_DATE', 'RUBY_PLATFORM',
'and', 'in', 'not', 'or',
'public', 'private', 'protected'
));
// http://www.tutorialspoint.com/ruby/ruby_builtin_functions.htm
// don't know how reliable that is... doesn't look incredibly inspiring
$this->add_identifier_mapping('FUNCTION', array('abord', 'Array',
'at_exit', 'autoload', 'binding', 'block_given?', 'callcc', 'caller',
'catch', 'chomp', 'chomp!', 'chop', 'chop!', 'eval', 'exec', 'exit',
'exit!', 'fail', 'Float', 'fork', 'format', 'gets', 'global_variables',
'gsub', 'gsub!', 'Integer', 'lambda', 'proc', 'load', 'local_variables',
'loop', 'open', 'p', 'print', 'printf', 'proc', 'puts', 'raise', 'fail',
'rand', 'readlines', 'require', 'scan', 'select', 'set_trace_func',
'sleep', 'split', 'sprintf', 'srand', 'String', 'syscall', 'system',
'sub', ',sub!', 'test', 'throw', 'trace_var', 'trap', 'untrace_var',
'abs', 'ceil', 'coerce', 'divmod', 'floor', 'integer?', 'modulo',
'nonzero?', 'remainder', 'round', 'truncate', 'zero?', 'chr', 'size',
'step', 'times', 'to_f', 'to_int', 'to_i', 'finite?', 'infinite?',
'nan?', 'atan2', 'cos', 'exp', 'frexp', 'ldexp', 'log', 'log10', 'sin',
'sqrt', 'tan'));
// this can break a bit with Ruby's whacky syntax
$this->remove_filter('pcre');
// don't want this.
$this->remove_filter('comment-to-doc');
$this->add_filter('REGEX', create_function('$tok',
'return LuminousFilters::pcre($tok, (isset($tok[1][0]) && $tok[1][0] === "/"));'));
}
protected function is_regex() {
/*
* Annoyingly I don't really know exactly what rules Ruby uses for
* disambiguating regular expressions. There might be some incorrect
* assumptions in here.
*/
if ($this->check('%/=\s%'))
return false;
$following_space = (bool)$this->check("%/[ \t]%");
$space = false;
for($i=count($this->tokens)-1; $i>=0; $i--) {
$tok = $this->tokens[$i];
if ($tok[0] === 'COMMENT') continue;
elseif ($tok[0] === 'OPERATOR') return true;
elseif($tok[0] === 'STRING') return true;
elseif ($tok[1] === '(' || $tok[1] === ',' || $tok[1] === '{' ||
$tok[1] === '[') {
// this is definitely an operand
return true;
}
elseif($tok[0] === null) {
$space = true;
continue;
}
elseif($tok[0] === 'NUMERIC') {
// this is definitely an operator
return false;
}
elseif ($tok[0] === 'IDENT'
|| $tok[0] === 'CONSTANT'
|| $tok[0] === 'VALUE' // aka :symbols
) {
// this could be an operator or operand
// Kate's syntax engine seems to operate on the following basis:
if ($space && $following_space) return false;
return $space;
}
return false;
}
return true; // no preceding tokens, presumably a code fragment.
}
protected function interpolate() {
$interpolation_scanner = new LuminousRubyScanner();
$interpolation_scanner->string($this->string());
$interpolation_scanner->pos($this->pos());
$interpolation_scanner->interpolation = true;
$interpolation_scanner->init();
$interpolation_scanner->main();
$this->record($interpolation_scanner->tagged(), 'INTERPOLATION', true);
$this->pos($interpolation_scanner->pos());
}
// handles the heredoc array. Call at eol/bol when the heredoc queue is
// not empty
protected function do_heredoc() {
assert (!empty($this->heredocs));
$start = $this->pos();
for($i=0; $i<count($this->heredocs) ; ) {
$top = $this->heredocs[$i];
list($ident, $identable, $interpolatable) = $top;
$searches = array(
sprintf('/^%s%s\\b/m', $identable? "[ \t]*" : '',
preg_quote($ident, '/'))
);
if ($interpolatable)
$searches[] = '/\#\{/';
list($next, $matches) = $this->get_next($searches);
if ($next === -1) {
// no match for end delim, run to EOS
$this->record(substr($this->string(), $start), 'HEREDOC');
$this->terminate();
break;
}
assert($matches !== null);
if ($matches[0] === '#{') { // interpolation, break heredoc and do that.
$this->pos($next);
$this->record(substr($this->string(), $start, $this->pos()-$start), 'HEREDOC');
$this->record($matches[0], 'DELIMITER');
$this->pos_shift(strlen($matches[0]));
$this->interpolate();
if ($this->peek() === '}')
$this->record($this->get(), 'DELIMITER');
$start = $this->pos();
}
else {
//
$this->pos($next);
$this->record(substr($this->string(), $start, $this->pos()-$start), 'HEREDOC');
$this->record($matches[0], 'DELIMITER');
$this->pos($next + strlen($matches[0]));
$start = $this->pos();
$i++;
}
// subscanner might have consumed all the string, in which case there's
// no point continuing
if ($this->eos()) break;
}
// we may or may not have technically addressed all the heredocs in the
// queue, but we do want to clear them out now
$this->heredocs = array();
}
private function record_string_range($from, $to, $type, $split) {
if ($to === $from) return;
$substr = substr($this->string(), $from, $to-$from);
if ($split) {
foreach(preg_split('/(\s+)/', $substr, -1, PREG_SPLIT_DELIM_CAPTURE) as $s) {
$type_ = preg_match('/^\s+$/', $s)? null : $type;
$this->record($s, $type_);
}
} else {
$this->record($substr, $type);
}
}
// handles string types (inc regexes), which may have nestable delimiters or
// interpolation.
// strdata is defined in the big ugly block in main()
// TODO: proper docs
protected function do_string($str_data) {
list($type, $open_delimiter, $close_delimiter, $pos, $interpolation,
$fancy_delim, $split) = $str_data;
$balanced = $open_delimiter !== $close_delimiter;
$template = '/(?<!\\\\)((?:\\\\\\\\)*)(%s)/';
$patterns = array();
$patterns['term'] = sprintf($template, preg_quote($close_delimiter, '/'));
if ($balanced) {
// for nesting balanced delims
$patterns['nest'] = sprintf($template, preg_quote($open_delimiter, '/'));
}
if ($interpolation) {
$patterns['interp'] = sprintf($template, preg_quote('#{', '/'));
}
$nesting_level = 0;
$break = false;
while (!$break) {
list($name, $index, $matches) = $this->get_next_named($patterns);
if ($name === null) {
// special case, no matches, record the rest of the string and break
// immediately
$this->record_string_range($pos, strlen($this->string()), $type, $split);
$this->terminate();
break;
}
elseif ($name === 'nest') {
// nestable opener
$nesting_level++;
$this->pos( $index + strlen($matches[0]) );
}
elseif($name === 'term') {
// terminator, may be nested
if ($nesting_level === 0) {
// wasn't nested, real terminator.
if ($fancy_delim) {
// matches[1] is either empty or a sequence of backslashes
$this->record_string_range($pos, $index+strlen($matches[1]), $type, $split);
$this->record($matches[2], 'DELIMITER');
} else {
$this->record_string_range($pos, $index+strlen($matches[0]), $type, $split);
}
$break = true;
}
else {
// pop a nesting level
$nesting_level--;
}
$this->pos( $index + strlen($matches[0]) );
}
elseif($name === 'interp') {
// interpolation - temporarily break string highlighting, then
// do interpolation, then resume.
$this->record_string_range($pos, $index + strlen($matches[1]), $type, $split);
$this->record($matches[2], 'DELIMITER');
$this->pos( $index + strlen($matches[0]) );
$this->interpolate();
if (($c = $this->peek()) === '}')
$this->record($this->get(), 'DELIMITER');
$pos = $this->pos();
}
else {
assert(0);
}
if ($break) break;
}
if ($type === 'REGEX' && $this->scan('/[iomx]+/'))
$this->record($this->match(), 'KEYWORD');
}
public function main() {
while (!$this->eos()) {
if ($this->bol() && !empty($this->heredocs)) {
$this->do_heredoc();
}
if ($this->interpolation) {
$c = $this->peek();
if ($c === '{') $this->curley_braces++;
elseif($c === '}') {
$this->curley_braces--;
if ($this->curley_braces <= 0) { break;}
}
}
if ($this->rails && $this->check('/-?%>/')) {
break;
}
$c = $this->peek();
if ($c === '=' && $this->scan('/^=begin .*? (^=end|\\z)/msx')) {
$this->record($this->match(), 'DOCCOMMENT');
}
elseif($c === '#' && $this->scan($this->comment_regex))
$this->record($this->match(), 'COMMENT');
elseif($this->scan($this->numeric) !== null) {
$this->record($this->match(), 'NUMERIC');
}
elseif( $c === '$' && $this->scan('/\\$
(?:
(?:[!@`\'\+1~=\/\\\,;\._0\*\$\?:"&<>])
|
(?: -[0adFiIlpvw])
|
(?:DEBUG|FILENAME|LOAD_PATH|stderr|stdin|stdout|VERBOSE)
)/x') || $this->scan('/(\\$|@@?)\w+/')) {
$this->record($this->match(), 'VARIABLE');
}
elseif($this->scan('/:\w+/')) {
$this->record($this->match(), 'VALUE');
}
elseif ( $c === '<' && $this->scan('/(<<(-?))([\'"`]?)([A-Z_]\w*)(\\3)/i')) {
$m = $this->match_groups();
$this->record($m[0], 'DELIMITER');
$hdoc = array($m[4], $m[2] === '-', $m[3] !== "'");
$this->heredocs[] = $hdoc;
}
// TODO: "% hello " is I think a valid string, using whitespace as
// delimiters. We're going to disallow this for now because
// we're not disambiguating between that and modulus
elseif (($c === '"' || $c === "'" || $c === '`' || $c === '%') &&
$this->scan('/[\'"`]|%( [qQrswWx](?![[:alnum:]]|$) | (?![[:alnum:]\s]|$))/xm')
|| ($c === '/' && $this->is_regex())
)
{
$interpolation = false;
$type = 'STRING';
$delimiter;
$pos;
$fancy_delim = false;
$split = false;
if ($c === '/') {
$interpolation = true;
$type = 'REGEX';
$delimiter = $c;
$pos = $this->pos();
$this->get();
} else {
$pos = $this->match_pos();
$delimiter = $this->match();
if ($delimiter === '"') {
$interpolation = true;
} elseif($delimiter === "'") {}
elseif($delimiter === '`') {
$type = 'FUNCTION';
}
else {
$delimiter = $this->get();
$m1 = $this->match_group(1);
if ($m1 === 'Q' || $m1 === 'r' || $m1 === 'W' || $m1 === 'x')
$interpolation = true;
if ($m1 === 'w' || $m1 === 'W')
$split = true;
if ($m1 === 'x') $type = 'FUNCTION';
elseif($m1 === 'r') $type = 'REGEX';
$fancy_delim = true;
$this->record($this->match() . $delimiter, 'DELIMITER');
$pos = $this->pos();
}
}
$data = array($type, $delimiter, LuminousUtils::balance_delimiter($delimiter),
$pos, $interpolation, $fancy_delim, $split);
$this->do_string($data);
}
elseif( (ctype_alpha($c) || $c === '_') &&
($m = $this->scan('/[_a-zA-Z]\w*[!?]?/')) !== null) {
$this->record($m, ctype_upper($m[0])? 'CONSTANT' : 'IDENT');
if ($m === '__END__') {
if (!$this->interpolation) {
$this->record($this->rest(), null);
$this->terminate();
}
break;
}
}
elseif($this->scan($this->operator_regex))
$this->record($this->match(), 'OPERATOR');
elseif($this->scan("/[ \t]+/")) $this->record($this->match(), null);
else {
$this->record($this->get(), null);
}
}
// In case not everything was popped
if (isset($this->state_[0])) {
$this->record(
substr($this->string(), $this->state_[0][3],
$this->pos() - $this->state_[0][3]),
$this->state_[0][0]
);
$this->terminate();
}
}
public static function guess_language($src, $info) {
if (strpos($info['shebang'], 'ruby') !== false) return 1.0;
elseif($info['shebang']) return 0;
$p = 0;
if (strpos($src, 'nil')) $p += 0.05;
if (strpos($src, '.nil?')) $p += 0.02;
if (strpos($src, '.empty?')) $p += 0.02;
// interpolation
if (strpos($src, '#{$')) $p += 0.02;
// @ and $ vars
if (preg_match('/@[a-zA-Z_]/', $src) && preg_match('/\\$[a-zA-Z_]/', $src))
$p += 0.02;
// symbols
if (preg_match('/:[a-zA-Z_]/', $src)) $p += 0.01;
// func def - no args
if (preg_match("/^\s*+def\s++[a-zA-Z_]\w*+[ \t]*+[\n\r]/m", $src))
$p += 0.1;
// {|x[,y[,z...]]| is a very ruby-like construct
if (preg_match('/ \\{ \\|
\s*+ [a-zA-Z_]\w*+ \s*+
(,\s*+[a-zA-Z_]\w*+\s*+)*+
\\|/x', $src))
$p += 0.15;
// so is 'do |x|'
if (preg_match("/\\bdo\s*+\\|[^\\|\r\n]++\\|/", $src))
$p += 0.05;
// class defs with inheritance has quite distinct syntax
// class x < y
if (preg_match(
"/^ \s* class \s+ \w+ \s* < \s* \w+(::\w+)* [\t ]*+ [\r\n] /mx",
$src))
$p += 0.1;
$num_lines = $info['num_lines'];
// let's say if 5% of lines are hash commented that's a good thing
if (substr_count($src, '#') > $num_lines/20) $p += 0.05;
// =~ /regex/
if (preg_match('%=~\s++/%', $src)) $p += 0.02;
if (preg_match('/unless\s+[^\?]++\?/', $src)) $p += 0.05;
if (preg_match('/^(\s*+)def\s+.*^\1end\s/ms', $src)) $p += 0.05;
if (preg_match('/\.to_\w+(?=\s|$)/', $src)) $p += 0.01;
return $p;
}
}

152
3rdparty/luminous/languages/scala.php vendored Executable file
View File

@ -0,0 +1,152 @@
<?php
/**
* Scala
*
* Direct port of old luminous language file.
*
* TODO: The XML literals may contain embedded scala code. This is bad
* because we ignore that currently, and we may, in rare circumstances,
* incorrectly pop a tag when in fact it's inside a scala expression
*
* Some comments reference section numbers of the scala spec:
* http://www.scala-lang.org/sites/default/files/linuxsoft_archives/docu/files/ScalaReference.pdf
*
*/
// scala inherits some stuff from Java
require_once(dirname(__FILE__) . '/include/java_func_list.php');
class LuminousScalaScanner extends LuminousSimpleScanner {
/**
* Multiline comments nest
*/
function comment_override() {
$this->nestable_token('COMMENT', '%/\\*%', '%\\*/%');
}
/**
* Scala has XML literals.
*/
function xml_override($matches) {
// this might just be an inequality, so we first need to disambiguate
// that
// 1.5 - the disambiguation is pretty simple, an XML tag must
// follow either whitespace, (, or {, and the '<' must be followed
// by '[!?_a-zA-Z]
// I'm not sure if a comment is a special case, or if it's treated as
// whitespace...
$xml = false;
for($i=count($this->tokens)-1; $i>=0; $i--) {
$tok = $this->tokens[$i];
$name = $tok[0];
// ... but we're going treat it as a no-op and skip over it
if ($name === 'COMMENT') continue;
$last_char = $tok[1][strlen($tok[1])-1];
if (!(ctype_space($last_char) || $last_char === '(' ||
$last_char === '{')) break;
if (!$this->check('/<[!?a-zA-Z0-9_]/')) break;
$xml = true;
}
if (!$xml) {
$this->record($matches[0], 'OPERATOR');
$this->pos_shift(strlen($matches[0]));
return;
}
$subscanner = new LuminousXMLScanner();
$subscanner->string($this->string());
$subscanner->pos($this->pos());
$subscanner->xml_literal = true;
$subscanner->init();
$subscanner->main();
$tagged = $subscanner->tagged();
$this->record($tagged, 'XML', true);
$this->pos($subscanner->pos());
}
function init() {
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_SL);
$this->add_pattern('COMMENT_ML', '%/\\*%');
$this->overrides['COMMENT_ML'] = array($this, 'comment_override');
// 1.3.1 integer literals, 1.3.2 floatingPointLiteral
// Do the float first so it takes precedence, our scanner does not follow
// the max-munch rule
$digit = '\d';
$exp = '(?:[eE][+-]?\d+)';
$suffix = '[FfDd]';
$this->add_pattern('NUMERIC', "/(?: \d+\\.\d* | \\.\d+) $exp? $suffix? /x");
$this->add_pattern('NUMERIC', "/\d+($exp $suffix? |$exp?$suffix)/x");
$this->add_pattern('NUMERIC', '/(?:0x[a-fA-F0-9]+|\d+)[lL]?/');
// 1.3.4 character literals
// we can't really parse the unicode and work out what's printable,
// so we'll just allow any unicode sequence
$this->add_pattern('CHARACTER',
"/'
(
(?:\\\\ (?:u[a-f0-9]{1,4}|\d+|.))
| .
)'/sx");
// 1.3.5 - 1.3.6
// strings are kind of pythonic, triple quoting makes them multiline
$this->add_pattern('STRING', '/"""
(?: [^"\\\\]+ | \\\\. | ""[^"] | "[^"])*
(?:"""|$)/sx');
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR_SL);
$this->add_pattern('lt', '/</');
$this->overrides['lt'] = array($this, 'xml_override');
$this->add_pattern('OPERATOR', '/[¬!%^&*-=+~;:|>\\/?\\\\]+/');
$this->add_pattern('IDENT', '/[a-z_]\w*/i');
// 1.3.3 boolean literals
$this->add_identifier_mapping('VALUE', array('true', 'false', 'null', 'None'));
// from old luminous file
$this->add_identifier_mapping('KEYWORD', array('abstract', 'case',
'catch', 'class', 'def', 'do', 'else', 'extends', 'final', 'finally',
'for', 'forSome', 'if', 'implicit', 'import', 'lazy', 'match',
'new', 'object', 'override', 'package', 'private', 'protected',
'return', 'sealed', 'super', 'this', 'throw', 'trait', 'try', 'type',
'val', 'var', 'while', 'with', 'yield'));
$this->add_identifier_mapping('TYPE', array('boolean', 'byte', 'char',
'dobule', 'float', 'int', 'long', 'string', 'short', 'unit',
'Boolean', 'Byte', 'Char', 'Double', 'Float', 'Int', 'Long', 'String',
'Short', 'Unit'));
// from Kate's syntax file
$this->add_identifier_mapping('TYPE', array('ActorProxy', 'ActorTask',
'ActorThread', 'AllRef', 'Any', 'AnyRef', 'Application', 'AppliedType',
'Array', 'ArrayBuffer', 'Attribute', 'BoxedArray', 'BoxedBooleanArray',
'BoxedByteArray', 'BoxedCharArray', 'Buffer', 'BufferedIterator', 'Char',
'Console', 'Enumeration', 'Fluid', 'Function', 'IScheduler',
'ImmutableMapAdaptor', 'ImmutableSetAdaptor', 'Int', 'Iterable', 'List',
'ListBuffer', 'None', 'Option', 'Ordered', 'Pair', 'PartialFunction',
'Pid', 'Predef', 'PriorityQueue', 'PriorityQueueProxy', 'Reaction',
'Ref', 'Responder', 'RichInt', 'RichString', 'Rule', 'RuleTransformer',
'Script', 'Seq', 'SerialVersionUID', 'Some', 'Stream', 'Symbol',
'TcpService', 'TcpServiceWorker', 'Triple', 'Unit', 'Value',
'WorkerThread', 'serializable', 'transient', 'volatile'));
$this->add_identifier_mapping('TYPE', $GLOBALS['luminous_java_types']);
}
public static function guess_language($src, $info) {
$p = 0;
// func def, a lot like python
if (preg_match('/\\bdef\s+\w+\s*\(/', $src)) $p += 0.05;
// val x = y
if (preg_match('/\\bval\s+\w+\s*=/', $src)) $p += 0.1;
// argument types
if (preg_match('/\\(\s*\w+\s*:\s*(String|Int|Array)/', $src)) $p += 0.05;
// tripled quoted strings, like python
if (preg_match('/\'{3}|"{3}/', $src)) $p += 0.05;
return $p;
}
}

335
3rdparty/luminous/languages/scss.php vendored Executable file
View File

@ -0,0 +1,335 @@
<?php
/**
* The SCSS scanner is quite complex, having to deal with nested rules
* and so forth and some disambiguation is non-trivial, so we are employing
* a two-pass approach here - we first tokenize the source as normal with a
* scanner, then we parse the token stream with a parser to figure out
* what various things really are.
*/
class LuminousSCSSScanner extends LuminousScanner {
private $regexen = array();
public $rule_tag_map = array(
'PROPERTY' => 'TYPE',
'COMMENT_SL' => 'COMMENT',
'COMMENT_ML' => 'COMMENT',
'ELEMENT_SELECTOR' => 'KEYWORD',
'STRING_S' => 'STRING',
'STRING_D' => 'STRING',
'CLASS_SELECTOR' => 'VARIABLE',
'ID_SELECTOR' => 'VARIABLE',
'PSEUDO_SELECTOR' => 'OPERATOR',
'ATTR_SELECTOR' => 'OPERATOR',
'WHITESPACE' => null,
'COLON' => 'OPERATOR',
'SEMICOLON' => 'OPERATOR',
'COMMA' => 'OPERATOR',
'R_BRACE' => 'OPERATOR',
'R_BRACKET' => 'OPERATOR',
'R_SQ_BRACKET' => 'OPERATOR',
'L_BRACE' => 'OPERATOR',
'L_BRACKET' => 'OPERATOR',
'L_SQ_BRACKET' => 'OPERATOR',
'OTHER_OPERATOR' => 'OPERATOR',
'GENERIC_IDENTIFIER' => null,
'AT_IDENTIFIER' => 'KEYWORD',
'IMPORTANT' => 'KEYWORD',
);
public function init() {
$this->regexen = array(
// For the first pass we just feed in a bunch of tokens.
// Some of these are generic and will require disambiguation later
'COMMENT_SL' => LuminousTokenPresets::$C_COMMENT_SL,
'COMMENT_ML' => LuminousTokenPresets::$C_COMMENT_ML,
'STRING_S' => LuminousTokenPresets::$SINGLE_STR,
'STRING_D' => LuminousTokenPresets::$DOUBLE_STR,
// TODO check var naming, is $1 a legal variable?
'VARIABLE' => '%\$[\-a-z_0-9]+ | \#\{\$[\-a-z_0-9]+\} %x',
'AT_IDENTIFIER' => '%@[a-zA-Z0-9]+%',
// This is generic - it may be a selector fragment, a rule, or
// even a hex colour.
'GENERIC_IDENTIFIER' => '@
\\#[a-fA-F0-9]{3}(?:[a-fA-F0-9]{3})?
|
[0-9]+(\.[0-9]+)?(\w+|%|in|cm|mm|em|ex|pt|pc|px|s)?
|
-?[a-zA-Z_\-0-9]+[a-zA-Z_\-0-9]*
|&
@x',
'IMPORTANT' => '/!important/',
'L_BRACE' => '/\{/',
'R_BRACE' => '/\}/',
'L_SQ_BRACKET' => '/\[/',
'R_SQ_BRACKET' => '/\]/',
'L_BRACKET' => '/\(/',
'R_BRACKET' => '/\)/',
'DOUBLE_COLON' => '/::/',
'COLON' => '/:/',
'SEMICOLON' => '/;/',
'DOT' => '/\./',
'HASH' => '/#/',
'COMMA' => '/,/',
'OTHER_OPERATOR' => '@[+\-*/%&>=!]@',
'WHITESPACE' => '/\s+/'
);
}
public function main() {
while (!$this->eos()) {
$m = null;
foreach($this->regexen as $token=>$pattern) {
if ( ($m = $this->scan($pattern)) !== null) {
$this->record($m, $token);
break;
}
}
if ($m === null) {
$this->record($this->get(), null);
}
}
$parser = new LuminousSASSParser();
$parser->tokens = $this->tokens;
$parser->parse();
$this->tokens = $parser->tokens;
}
}
/**
* The parsing class
*/
class LuminousSASSParser {
public $tokens;
public $index;
public $stack;
static $delete_token = 'delete';
/**
* Returns true if the next token is the given token name
* optionally skipping whitespace
*/
function next_is($token_name, $ignore_whitespace = false) {
$i = $this->index+1;
$len = count($this->tokens);
while($i<$len) {
$tok = $this->tokens[$i][0];
if ($ignore_whitespace && $tok === 'WHITESPACE') {
$i++;
}
else {
return $tok === $token_name;
}
}
return false;
}
/**
* Returns the index of the next match of the sequence of tokens
* given, optionally ignoring ertain tokens
*/
function next_sequence($sequence, $ignore=array()) {
$i = $this->index+1;
$len = count($this->tokens);
$seq_len = count($sequence);
$seq = 0;
$seq_start = 0;
while ($i<$len) {
$tok = $this->tokens[$i][0];
if ($tok === $sequence[$seq]) {
if ($seq === 0) $seq_start = $i;
$seq++;
$i++;
if ($seq === $seq_len) {
return $seq_start;
}
} else {
if (in_array($tok, $ignore)) {}
else {
$seq = 0;
}
$i++;
}
}
return $len;
}
/**
* Returns the first token which occurs out of the set of given tokens
*/
function next_of($token_names) {
$i = $this->index+1;
$len = count($this->tokens);
while ($i<$len) {
$tok = $this->tokens[$i][0];
if (in_array($tok, $token_names)) {
return $tok;
}
$i++;
}
return null;
}
/**
* Returns the index of the next token with the given token name
*/
function next_of_type($token_name) {
$i = $this->index+1;
$len = count($this->tokens);
while($i<$len) {
$tok = $this->tokens[$i][0];
if ($tok === $token_name) {
return $i;
}
$i++;
}
return $len;
}
private function _parse_identifier($token) {
$val = $token[1];
$c = isset($val[0])? $val[0] : '';
if (ctype_digit($c) || $c === '#') {
$token[0] = 'NUMERIC';
}
}
/**
* Parses a selector rule
*/
private function _parse_rule() {
$new_token = $this->tokens[$this->index];
$set = false;
if ($this->index > 0) {
$prev_token = &$this->tokens[$this->index-1];
$prev_token_type = &$prev_token[0];
$prev_token_text = &$prev_token[1];
$concat = false;
$map = array(
'DOT' => 'CLASS_SELECTOR',
'HASH' => 'ID_SELECTOR',
'COLON' => 'PSEUDO_SELECTOR',
'DOUBLE_COLON' => 'PSEUDO_SELECTOR'
);
if (isset($map[$prev_token_type])) {
// mark the prev token for deletion and concat into one.
$new_token[0] = $map[$prev_token_type];
$prev_token_type = self::$delete_token;
$new_token[1] = $prev_token_text . $new_token[1];
$set = true;
}
}
if (!$set) {
// must be an element
$new_token[0] = 'ELEMENT_SELECTOR';
}
$this->tokens[$this->index] = $new_token;
}
/**
* Cleans up the token stream by deleting any tokens marked for
* deletion, and makes sure the array is continuous afterwards.
*/
private function _cleanup() {
foreach($this->tokens as $i=>$t) {
if ($t[0] === self::$delete_token) {
unset($this->tokens[$i]);
}
}
$this->tokens = array_values($this->tokens);
}
/**
* Main parsing function
*/
public function parse() {
$new_tokens = array();
$len = count($this->tokens);
$this->stack = array();
$prop_value = 'PROPERTY';
$pushes = array(
'L_BRACKET' => 'bracket',
'L_BRACE' => 'brace',
'AT_IDENTIFIER' => 'at',
'L_SQ_BRACKET' => 'square'
);
$pops = array(
'R_BRACKET' => 'bracket',
'R_BRACE' => 'brace',
'R_SQ_BRACKET' => 'square'
);
$this->index = 0;
while($this->index < $len) {
$token = &$this->tokens[$this->index];
$stack_size = count($this->stack);
$state = !$stack_size? null : $this->stack[$stack_size-1];
$tok_name = &$token[0];
$in_brace = in_array('brace', $this->stack);
$in_bracket = in_array('bracket', $this->stack);
$in_sq = in_array('square', $this->stack);
$in_at = in_array('at', $this->stack);
if ($tok_name === self::$delete_token) continue;
if ($tok_name === 'L_BRACE') {
if ($state === 'at') {
array_pop($this->stack);
}
$this->stack[] = $pushes[$tok_name];
$prop_value = 'PROPERTY';
}
elseif (isset($pushes[$tok_name])) {
$this->stack[] = $pushes[$tok_name];
} else if (isset($pops[$tok_name]) && $state === $pops[$tok_name]) {
array_pop($this->stack);
}
elseif (!$in_bracket && $tok_name === 'COLON') {
$prop_value = 'VALUE';
}
elseif ($tok_name === 'SEMICOLON') {
$prop_value = 'PROPERTY';
if ($state === 'at') array_pop($this->stack);
}
elseif ($tok_name === 'GENERIC_IDENTIFIER') {
// this is where the fun starts.
// we have to figure out exactly what this is
// if we can look ahead and find a '{' before we find a
// ';', then this is part of a selector.
// Otherwise it's part of a property/value pair.
// the exception is when we have something like:
// font : { family : sans-serif; }
// then we need to check for ':{'
if ($in_sq) {
$token[0] = 'ATTR_SELECTOR';
}
else if ($in_bracket) {
$this->_parse_identifier($token);
}
elseif(!$in_at) {
$semi = $this->next_of_type('SEMICOLON');
$colon_brace = $this->next_sequence(array('COLON', 'L_BRACE'),
array('WHITESPACE'));
$brace = $this->next_of_type('L_BRACE');
$rule_terminator = min($semi, $colon_brace);
if ($brace < $rule_terminator) {
$this->_parse_rule();
$prop_value = 'PROPERTY';
} else {
$this->tokens[$this->index][0] = $prop_value;
if ($prop_value === 'VALUE') {
$this->_parse_identifier($token);
}
}
}
}
$this->index++;
}
$this->_cleanup();
}
}

62
3rdparty/luminous/languages/sql.php vendored Executable file
View File

@ -0,0 +1,62 @@
<?php
class LuminousSQLScanner extends LuminousSimpleScanner {
public function init() {
$this->case_sensitive = false;
// $this->remove_stream_filter('oo-syntax');
$this->remove_filter('comment-to-doc');
$this->remove_filter('constant');
include(dirname(__FILE__) . '/include/sql.php');
$this->add_identifier_mapping('KEYWORD', $keywords);
$this->add_identifier_mapping('TYPE', $types);
$this->add_identifier_mapping('VALUE', $values);
$this->add_identifier_mapping('OPERATOR', $operators);
$this->add_identifier_mapping('FUNCTION', $functions);
$this->add_pattern('IDENT', '/[a-zA-Z_]+\w*/');
$this->add_pattern('COMMENT', LuminousTokenPresets::$C_COMMENT_ML);
// # is for MySQL.
$this->add_pattern('COMMENT', '/(?:\#|--).*/');
$this->add_pattern('STRING', LuminousTokenPresets::$SQL_SINGLE_STR_BSLASH);
$this->add_pattern('STRING', LuminousTokenPresets::$DOUBLE_STR);
$this->add_pattern('STRING', '/ ` (?> [^\\\\`]+ | \\\\. )* (?: `|$)/x');
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('OPERATOR', '/[¬!%^&*\\-=+~:<>\\|\\/]+/');
$this->add_pattern('KEYWORD', '/\\?/');
}
public static function guess_language($src, $info) {
// we have to be careful not to assign too much weighting to
// generic SQL keywords, which will often appear in other languages
// when those languages are executing SQL statements
//
// All in all, SQL is pretty hard to recognise because generally speaking,
// an SQL dump will probably contain only a tiny fraction of SQL keywords
// with the majority of the text just being data.
$p = 0.0;
// if we're lucky, the top line will be a comment containing the phrase
// 'SQL' or 'dump'
if (strpos($info['trimmed'], '--') === 0 && isset($info['lines'][0])
&& (
stripos($info['lines'][0], 'sql') !== false)
|| stripos($info['lines'][0], 'dump' !== false)
)
$p = 0.5;
foreach(array('SELECT', 'CREATE TABLE', 'INSERT INTO', 'DROP TABLE',
'INNER JOIN', 'OUTER JOIN') as $str)
{
if (strpos($src, $str) !== false) $p += 0.01;
}
// single line comments --
if (preg_match_all('/^--/m', $src, $m) > 5)
$p += 0.05;
if (preg_match('/VARCHAR\(\d+\)/', $src)) $p += 0.05;
return $p;
}
}

78
3rdparty/luminous/languages/vb.php vendored Executable file
View File

@ -0,0 +1,78 @@
<?php
/*
* VB.NET
*
* Language spec:
* http://msdn.microsoft.com/en-us/library/aa712050(v=vs.71).aspx
*
* TODO: IIRC vb can be embedded in asp pages like php or ruby on rails,
* and XML literals: these are a little bit confusing, something
* like "<xyz>.something" appears to be a valid XML fragment (i.e. the <xyz>
* is a complete fragment), but at other times, the fragment would run until
* the root tag is popped. Need to find a proper description of the grammar
* to figure it out
*/
class LuminousVBScanner extends LuminousSimpleScanner {
public $case_sensitive = false;
public function init() {
$this->add_pattern('PREPROCESSOR', "/^[\t ]*#.*/m");
$this->add_pattern('COMMENT', "/'.*/");
$this->add_pattern('COMMENT', '/\\bREM\\b.*/i');
// float
$this->add_pattern('NUMERIC', '/ (?<!\d)
\d+\.\d+ (?: e[+\\-]?\d+)?
|\.\d+ (?: e[+\\-]?\d+)?
| \d+ e[+\\-]?\d+
/xi');
// int
$this->add_pattern('NUMERIC', '/ (?:
&H[0-9a-f]+
| &O[0-7]+
| (?<!\d)\d+
) [SIL]*/ix');
$this->add_pattern('CHARACTER', '/"(?:""|.)"c/i');
$this->add_pattern('STRING', '/" (?> [^"]+ | "" )* ($|")/x');
// in theory we should also match unicode quote chars
// in reality, well, I read the php docs and I have no idea if it's
// even possible.
// The chars are:
// http://www.fileformat.info/info/unicode/char/201c/index.htm
// and
// http://www.fileformat.info/info/unicode/char/201d/index.htm
// date literals, this isn't as discriminating as the grammar specifies.
$this->add_pattern('VALUE', "/#[ \t][^#\n]*[ \t]#/");
$this->add_pattern('OPERATOR', '/[&*+\\-\\/\\\\^<=>,\\.]+/');
// http://msdn.microsoft.com/en-us/library/aa711645(v=VS.71).aspx
// XXX: it warns about ! being ambiguous but I don't see how it can be
// ambiguous if we use this regex?
$this->add_pattern('IDENT', '/[a-z_]\w*[%&@!#$]?/i');
// we'll borrow C#'s list of types (ie modules, classes, etc)
include(dirname(__FILE__) . '/include/vb.php');
include(dirname(__FILE__) . '/include/csharp_list.php');
$this->add_identifier_mapping('VALUE', $luminous_vb_values);
$this->add_identifier_mapping('OPERATOR', $luminous_vb_operators);
$this->add_identifier_mapping('TYPE', $luminous_vb_types);
$this->add_identifier_mapping('KEYWORD', $luminous_vb_keywords);
$this->add_identifier_mapping('TYPE', $luminous_csharp_type_list);
}
public static function guess_language($src, $info) {
$p = 0.0;
if (preg_match('/^Imports\s+System/i', $src)) $p += 0.1;
if (preg_match('/Dim\s+\w+\s+As\s+/i', $src)) $p += 0.2;
if (preg_match('/(Public|Private|Protected)\s+Sub\s+/i', $src)) $p += 0.1;
return $p;
}
}

63
3rdparty/luminous/languages/vim.php vendored Executable file
View File

@ -0,0 +1,63 @@
<?php
// I can't find some formal definition of vimscript's grammar.
// I'm pretty sure it's more complex than this, but, who knows.
require_once(dirname(__FILE__) . '/include/vim_list.php');
class LuminousVimScriptScanner extends LuminousSimpleScanner {
public function string_override() {
$comment = $this->bol();
$this->skip_whitespace();
assert($this->peek() === '"');
if ($comment) {
$this->record($this->scan("/.*/"), 'COMMENT');
} else {
if ($this->scan("/ \" (?> [^\n\"\\\\]+ | \\\\. )*$ /mx")) {
$this->record($this->match(), 'COMMENT');
}
else {
$m = $this->scan(LuminousTokenPresets::$DOUBLE_STR);
assert($m !== null);
$this->record($m, 'STRING');
}
}
}
static function comment_filter($token) {
$token = LuminousUtils::escape_token($token);
$str = &$token[1];
// It pays to run the strpos checks first.
if (strpos(substr($str, 1), '"') !== false)
$str = preg_replace('/(?<!^)"(?>[^"]*)"/', "<STRING>$0</STRING>", $str);
if (strpos($str, ':') !== false)
$str = preg_replace('/(?<=^")((?>\W*))((?>[A-Z]\w+(?>(?>\s+\w+)*)))(:\s*)(.*)/',
'$1<DOCTAG>$2</DOCTAG>$3<DOCSTR>$4</DOCSTR>', $str);
return $token;
}
function init() {
$this->add_pattern('COMMENT_STRING', "/[\t ]*\"/");
$this->add_pattern('STRING', "/'(?>[^\n\\\\']+ | \\\\. )*'/x");
$this->add_pattern('NUMERIC','/\#[a-f0-9]+/i');
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_HEX);
$this->add_pattern('NUMERIC', LuminousTokenPresets::$NUM_REAL);
$this->add_pattern('IDENT', '/[a-z_]\w*/i');
$this->add_pattern('OPERATOR', '@[~¬!%^&*\-=+;:,<.>/?\|]+@');
$this->add_identifier_mapping('FUNCTION',
$GLOBALS['luminous_vim_functions']);
$this->add_identifier_mapping('KEYWORD',
$GLOBALS['luminous_vim_keywords']);
$this->remove_stream_filter('oo-syntax');
$this->remove_filter('comment-to-doc');
$this->add_filter('comment', 'COMMENT', array($this, 'comment_filter'));
$this->overrides = array('COMMENT_STRING' => array($this, 'string_override'));
}
}

31
3rdparty/luminous/languages/xml.php vendored Executable file
View File

@ -0,0 +1,31 @@
<?php
/*
* HTML is the 'root' scanner, we just override a couple of config settings
* here, to prevent it from looking for CSS or JS.
*/
class LuminousXMLScanner extends LuminousHTMLScanner {
public $scripts = false;
public $embedded_server = false;
public static function guess_language($src, $info) {
if (strpos(ltrim($src), '<?xml') === 0) return 1.0;
// don't catch HTML doctypes
if (strpos($src, '<!DOCTYPE') !== false) return 0;
$p = 0;
// simple tag
$lines = preg_match_all('/$/m',
preg_replace('/^\s+/m', '', $src), $m);
// avg 1 tag every 4 lines
if (preg_match_all('%<[!?/]?[a-zA-Z_:\\-]%', $src, $m)
> $lines/4) $p += 0.15;
// self closing tag
if (strpos($src, '/>') !== false) $p += 0.05;
// tag with attr
if (preg_match('/<[a-zA-Z_]\w*\s+[a-zA-Z_]\w+\s*=["\']/', $src))
$p += 0.1;
return $p;
}
}