'TYPE', 'COMMENT_SL' => 'COMMENT', 'COMMENT_ML' => 'COMMENT', 'ELEMENT_SELECTOR' => 'KEYWORD', 'STRING_S' => 'STRING', 'STRING_D' => 'STRING', 'CLASS_SELECTOR' => 'VARIABLE', 'ID_SELECTOR' => 'VARIABLE', 'PSEUDO_SELECTOR' => 'OPERATOR', 'ATTR_SELECTOR' => 'OPERATOR', 'WHITESPACE' => null, 'COLON' => 'OPERATOR', 'SEMICOLON' => 'OPERATOR', 'COMMA' => 'OPERATOR', 'R_BRACE' => 'OPERATOR', 'R_BRACKET' => 'OPERATOR', 'R_SQ_BRACKET' => 'OPERATOR', 'L_BRACE' => 'OPERATOR', 'L_BRACKET' => 'OPERATOR', 'L_SQ_BRACKET' => 'OPERATOR', 'OTHER_OPERATOR' => 'OPERATOR', 'GENERIC_IDENTIFIER' => null, 'AT_IDENTIFIER' => 'KEYWORD', 'IMPORTANT' => 'KEYWORD', ); public function init() { $this->regexen = array( // For the first pass we just feed in a bunch of tokens. // Some of these are generic and will require disambiguation later 'COMMENT_SL' => LuminousTokenPresets::$C_COMMENT_SL, 'COMMENT_ML' => LuminousTokenPresets::$C_COMMENT_ML, 'STRING_S' => LuminousTokenPresets::$SINGLE_STR, 'STRING_D' => LuminousTokenPresets::$DOUBLE_STR, // TODO check var naming, is $1 a legal variable? 'VARIABLE' => '%\$[\-a-z_0-9]+ | \#\{\$[\-a-z_0-9]+\} %x', 'AT_IDENTIFIER' => '%@[a-zA-Z0-9]+%', // This is generic - it may be a selector fragment, a rule, or // even a hex colour. 'GENERIC_IDENTIFIER' => '@ \\#[a-fA-F0-9]{3}(?:[a-fA-F0-9]{3})? | [0-9]+(\.[0-9]+)?(\w+|%|in|cm|mm|em|ex|pt|pc|px|s)? | -?[a-zA-Z_\-0-9]+[a-zA-Z_\-0-9]* |& @x', 'IMPORTANT' => '/!important/', 'L_BRACE' => '/\{/', 'R_BRACE' => '/\}/', 'L_SQ_BRACKET' => '/\[/', 'R_SQ_BRACKET' => '/\]/', 'L_BRACKET' => '/\(/', 'R_BRACKET' => '/\)/', 'DOUBLE_COLON' => '/::/', 'COLON' => '/:/', 'SEMICOLON' => '/;/', 'DOT' => '/\./', 'HASH' => '/#/', 'COMMA' => '/,/', 'OTHER_OPERATOR' => '@[+\-*/%&>=!]@', 'WHITESPACE' => '/\s+/' ); } public function main() { while (!$this->eos()) { $m = null; foreach($this->regexen as $token=>$pattern) { if ( ($m = $this->scan($pattern)) !== null) { $this->record($m, $token); break; } } if ($m === null) { $this->record($this->get(), null); } } $parser = new LuminousSASSParser(); $parser->tokens = $this->tokens; $parser->parse(); $this->tokens = $parser->tokens; } } /** * The parsing class */ class LuminousSASSParser { public $tokens; public $index; public $stack; static $delete_token = 'delete'; /** * Returns true if the next token is the given token name * optionally skipping whitespace */ function next_is($token_name, $ignore_whitespace = false) { $i = $this->index+1; $len = count($this->tokens); while($i<$len) { $tok = $this->tokens[$i][0]; if ($ignore_whitespace && $tok === 'WHITESPACE') { $i++; } else { return $tok === $token_name; } } return false; } /** * Returns the index of the next match of the sequence of tokens * given, optionally ignoring ertain tokens */ function next_sequence($sequence, $ignore=array()) { $i = $this->index+1; $len = count($this->tokens); $seq_len = count($sequence); $seq = 0; $seq_start = 0; while ($i<$len) { $tok = $this->tokens[$i][0]; if ($tok === $sequence[$seq]) { if ($seq === 0) $seq_start = $i; $seq++; $i++; if ($seq === $seq_len) { return $seq_start; } } else { if (in_array($tok, $ignore)) {} else { $seq = 0; } $i++; } } return $len; } /** * Returns the first token which occurs out of the set of given tokens */ function next_of($token_names) { $i = $this->index+1; $len = count($this->tokens); while ($i<$len) { $tok = $this->tokens[$i][0]; if (in_array($tok, $token_names)) { return $tok; } $i++; } return null; } /** * Returns the index of the next token with the given token name */ function next_of_type($token_name) { $i = $this->index+1; $len = count($this->tokens); while($i<$len) { $tok = $this->tokens[$i][0]; if ($tok === $token_name) { return $i; } $i++; } return $len; } private function _parse_identifier($token) { $val = $token[1]; $c = isset($val[0])? $val[0] : ''; if (ctype_digit($c) || $c === '#') { $token[0] = 'NUMERIC'; } } /** * Parses a selector rule */ private function _parse_rule() { $new_token = $this->tokens[$this->index]; $set = false; if ($this->index > 0) { $prev_token = &$this->tokens[$this->index-1]; $prev_token_type = &$prev_token[0]; $prev_token_text = &$prev_token[1]; $concat = false; $map = array( 'DOT' => 'CLASS_SELECTOR', 'HASH' => 'ID_SELECTOR', 'COLON' => 'PSEUDO_SELECTOR', 'DOUBLE_COLON' => 'PSEUDO_SELECTOR' ); if (isset($map[$prev_token_type])) { // mark the prev token for deletion and concat into one. $new_token[0] = $map[$prev_token_type]; $prev_token_type = self::$delete_token; $new_token[1] = $prev_token_text . $new_token[1]; $set = true; } } if (!$set) { // must be an element $new_token[0] = 'ELEMENT_SELECTOR'; } $this->tokens[$this->index] = $new_token; } /** * Cleans up the token stream by deleting any tokens marked for * deletion, and makes sure the array is continuous afterwards. */ private function _cleanup() { foreach($this->tokens as $i=>$t) { if ($t[0] === self::$delete_token) { unset($this->tokens[$i]); } } $this->tokens = array_values($this->tokens); } /** * Main parsing function */ public function parse() { $new_tokens = array(); $len = count($this->tokens); $this->stack = array(); $prop_value = 'PROPERTY'; $pushes = array( 'L_BRACKET' => 'bracket', 'L_BRACE' => 'brace', 'AT_IDENTIFIER' => 'at', 'L_SQ_BRACKET' => 'square' ); $pops = array( 'R_BRACKET' => 'bracket', 'R_BRACE' => 'brace', 'R_SQ_BRACKET' => 'square' ); $this->index = 0; while($this->index < $len) { $token = &$this->tokens[$this->index]; $stack_size = count($this->stack); $state = !$stack_size? null : $this->stack[$stack_size-1]; $tok_name = &$token[0]; $in_brace = in_array('brace', $this->stack); $in_bracket = in_array('bracket', $this->stack); $in_sq = in_array('square', $this->stack); $in_at = in_array('at', $this->stack); if ($tok_name === self::$delete_token) continue; if ($tok_name === 'L_BRACE') { if ($state === 'at') { array_pop($this->stack); } $this->stack[] = $pushes[$tok_name]; $prop_value = 'PROPERTY'; } elseif (isset($pushes[$tok_name])) { $this->stack[] = $pushes[$tok_name]; } else if (isset($pops[$tok_name]) && $state === $pops[$tok_name]) { array_pop($this->stack); } elseif (!$in_bracket && $tok_name === 'COLON') { $prop_value = 'VALUE'; } elseif ($tok_name === 'SEMICOLON') { $prop_value = 'PROPERTY'; if ($state === 'at') array_pop($this->stack); } elseif ($tok_name === 'GENERIC_IDENTIFIER') { // this is where the fun starts. // we have to figure out exactly what this is // if we can look ahead and find a '{' before we find a // ';', then this is part of a selector. // Otherwise it's part of a property/value pair. // the exception is when we have something like: // font : { family : sans-serif; } // then we need to check for ':{' if ($in_sq) { $token[0] = 'ATTR_SELECTOR'; } else if ($in_bracket) { $this->_parse_identifier($token); } elseif(!$in_at) { $semi = $this->next_of_type('SEMICOLON'); $colon_brace = $this->next_sequence(array('COLON', 'L_BRACE'), array('WHITESPACE')); $brace = $this->next_of_type('L_BRACE'); $rule_terminator = min($semi, $colon_brace); if ($brace < $rule_terminator) { $this->_parse_rule(); $prop_value = 'PROPERTY'; } else { $this->tokens[$this->index][0] = $prop_value; if ($prop_value === 'VALUE') { $this->_parse_identifier($token); } } } } $this->index++; } $this->_cleanup(); } }