comunic/3rdparty/luminous/languages/scss.php

<?php
/**
 * The SCSS scanner is quite complex, having to deal with nested rules
 * and so forth and some disambiguation is non-trivial, so we are employing
 * a two-pass approach here - we first tokenize the source as normal with a
 * scanner, then we parse the token stream with a parser to figure out
 * what various things really are.
 */
class LuminousSCSSScanner extends LuminousScanner {
    private $regexen = array();

    public $rule_tag_map = array(
        'PROPERTY' => 'TYPE',
        'COMMENT_SL' => 'COMMENT',
        'COMMENT_ML' => 'COMMENT',
        'ELEMENT_SELECTOR' => 'KEYWORD',
        'STRING_S' => 'STRING',
        'STRING_D' => 'STRING',
        'CLASS_SELECTOR' => 'VARIABLE',
        'ID_SELECTOR' => 'VARIABLE',
        'PSEUDO_SELECTOR' => 'OPERATOR',
        'ATTR_SELECTOR' => 'OPERATOR',
        'WHITESPACE' => null,
        'COLON' => 'OPERATOR',
        'SEMICOLON' => 'OPERATOR',
        'COMMA' => 'OPERATOR',
        'R_BRACE' => 'OPERATOR',
        'R_BRACKET' => 'OPERATOR',
        'R_SQ_BRACKET' => 'OPERATOR',
        'L_BRACE' => 'OPERATOR',
        'L_BRACKET' => 'OPERATOR',
        'L_SQ_BRACKET' => 'OPERATOR',
        'OTHER_OPERATOR' => 'OPERATOR',
        'GENERIC_IDENTIFIER' => null,
        'AT_IDENTIFIER' => 'KEYWORD',
        'IMPORTANT' => 'KEYWORD',
    );

    public function init() {
        $this->regexen = array(
            // For the first pass we just feed in a bunch of tokens.
            // Some of these are generic and will require disambiguation later
            'COMMENT_SL' => LuminousTokenPresets::$C_COMMENT_SL,
            'COMMENT_ML' =>  LuminousTokenPresets::$C_COMMENT_ML,
            'STRING_S' => LuminousTokenPresets::$SINGLE_STR,
            'STRING_D' => LuminousTokenPresets::$DOUBLE_STR,
            // TODO check var naming, is $1 a legal variable?
            'VARIABLE' => '%\$[\-a-z_0-9]+ | \#\{\$[\-a-z_0-9]+\} %x',
            'AT_IDENTIFIER' => '%@[a-zA-Z0-9]+%',

            // This is generic - it may be a selector fragment, a rule, or
            // even a hex colour.
            'GENERIC_IDENTIFIER' => '@
                \\#[a-fA-F0-9]{3}(?:[a-fA-F0-9]{3})?
                |
                [0-9]+(\.[0-9]+)?(\w+|%|in|cm|mm|em|ex|pt|pc|px|s)?
                |
                -?[a-zA-Z_\-0-9]+[a-zA-Z_\-0-9]*
                |&
            @x',
            'IMPORTANT' => '/!important/',
            'L_BRACE' => '/\{/',
            'R_BRACE' => '/\}/',
            'L_SQ_BRACKET' => '/\[/',
            'R_SQ_BRACKET' => '/\]/',
            'L_BRACKET' => '/\(/',
            'R_BRACKET' => '/\)/',

            'DOUBLE_COLON' => '/::/',
            'COLON' => '/:/',
            'SEMICOLON' => '/;/',

            'DOT' => '/\./',
            'HASH' => '/#/',

            'COMMA' => '/,/',

            'OTHER_OPERATOR' => '@[+\-*/%&>=!]@',

            'WHITESPACE' => '/\s+/'
        );
    }


    public function main() {
        while (!$this->eos()) {
            $m = null;
            foreach($this->regexen as $token=>$pattern) {
                if ( ($m = $this->scan($pattern)) !== null) {
                    $this->record($m, $token);
                    break;
                }
            }
            if ($m === null) {
                $this->record($this->get(), null);
            }
        }
        $parser = new LuminousSASSParser();
        $parser->tokens = $this->tokens;
        $parser->parse();
        $this->tokens = $parser->tokens;
    }
}
/**
 * The parsing class
 */
class LuminousSASSParser {

    public $tokens;
    public $index;
    public $stack;
    static $delete_token = 'delete';

    /**
     * Returns true if the next token is the given token name
     * optionally skipping whitespace
     */
    function next_is($token_name, $ignore_whitespace = false) {
        $i = $this->index+1;
        $len = count($this->tokens);
        while($i<$len) {
            $tok = $this->tokens[$i][0];
            if ($ignore_whitespace && $tok === 'WHITESPACE') {
                $i++;
            }
            else {
                return $tok === $token_name;
            }
        }
        return false;
    }
    /**
     * Returns the index of the next match of the sequence of tokens
     * given, optionally ignoring ertain tokens
     */
    function next_sequence($sequence, $ignore=array()) {
        $i = $this->index+1;
        $len = count($this->tokens);
        $seq_len = count($sequence);
        $seq = 0;
        $seq_start = 0;
        while ($i<$len) {
            $tok = $this->tokens[$i][0];
            if ($tok === $sequence[$seq]) {
                if ($seq === 0) $seq_start = $i;
                $seq++;
                $i++;
                if ($seq === $seq_len) {
                    return $seq_start;
                }
            } else {
                if (in_array($tok, $ignore)) {}
                else {
                    $seq = 0;
                }
                $i++;
            }
        }
        return $len;
    }

    /**
     * Returns the first token which occurs out of the set of given tokens
     */
    function next_of($token_names) {
        $i = $this->index+1;
        $len = count($this->tokens);
        while ($i<$len) {
            $tok = $this->tokens[$i][0];
            if (in_array($tok, $token_names)) {
                return $tok;
            }
            $i++;
        }
        return null;

    }
    /**
     * Returns the index of the next token with the given token name
     */
    function next_of_type($token_name) {
        $i = $this->index+1;
        $len = count($this->tokens);
        while($i<$len) {
            $tok = $this->tokens[$i][0];
            if ($tok === $token_name) {
                return $i;
            }
            $i++;
        }
        return $len;
    }

    private function _parse_identifier($token) {
        $val = $token[1];
        $c = isset($val[0])? $val[0] : '';
        if (ctype_digit($c) || $c === '#') {
            $token[0] = 'NUMERIC';
        }
    }

    /**
    * Parses a selector rule
    */
    private function _parse_rule() {
        $new_token = $this->tokens[$this->index];
        $set = false;
        if ($this->index > 0) {
            $prev_token = &$this->tokens[$this->index-1];
            $prev_token_type = &$prev_token[0];
            $prev_token_text = &$prev_token[1];
            $concat = false;

            $map = array(
                'DOT' => 'CLASS_SELECTOR',
                'HASH' => 'ID_SELECTOR',
                'COLON' => 'PSEUDO_SELECTOR',
                'DOUBLE_COLON' => 'PSEUDO_SELECTOR'
            );
            if (isset($map[$prev_token_type])) {
                // mark the prev token for deletion and concat into one.
                $new_token[0] = $map[$prev_token_type];
                $prev_token_type = self::$delete_token;
                $new_token[1] = $prev_token_text . $new_token[1];
                $set = true;
            }
        }
        if (!$set) {
            // must be an element
            $new_token[0] = 'ELEMENT_SELECTOR';
        }
        $this->tokens[$this->index] = $new_token;
    }

    /**
     * Cleans up the token stream by deleting any tokens marked for
     * deletion, and makes sure the array is continuous afterwards.
     */
    private function _cleanup() {
        foreach($this->tokens as $i=>$t) {
            if ($t[0] === self::$delete_token) {
                unset($this->tokens[$i]);
            }
        }
        $this->tokens = array_values($this->tokens);
    }
    /**
     * Main parsing function
     */
    public function parse() {
        $new_tokens = array();
        $len = count($this->tokens);
        $this->stack = array();
        $prop_value = 'PROPERTY';
        $pushes = array(
            'L_BRACKET' => 'bracket',
            'L_BRACE' => 'brace',
            'AT_IDENTIFIER' => 'at',
            'L_SQ_BRACKET' => 'square'
        );
        $pops = array(
            'R_BRACKET' => 'bracket',
            'R_BRACE' => 'brace',
            'R_SQ_BRACKET' => 'square'
        );
        $this->index = 0;
        while($this->index < $len) {
            $token = &$this->tokens[$this->index];
            $stack_size = count($this->stack);
            $state = !$stack_size? null : $this->stack[$stack_size-1];
            $tok_name = &$token[0];
            $in_brace = in_array('brace', $this->stack);
            $in_bracket = in_array('bracket', $this->stack);
            $in_sq = in_array('square', $this->stack);
            $in_at = in_array('at', $this->stack);
            if ($tok_name === self::$delete_token) continue;

            if ($tok_name === 'L_BRACE') {
                if ($state === 'at') {
                    array_pop($this->stack);
                }
                $this->stack[] = $pushes[$tok_name];
                $prop_value = 'PROPERTY';
            }
            elseif (isset($pushes[$tok_name])) {
                $this->stack[] = $pushes[$tok_name];
            } else if (isset($pops[$tok_name]) && $state === $pops[$tok_name]) {
                array_pop($this->stack);
            }
            elseif (!$in_bracket && $tok_name === 'COLON') {
                $prop_value = 'VALUE';
            }
            elseif ($tok_name === 'SEMICOLON') {
                $prop_value = 'PROPERTY';
                if ($state === 'at') array_pop($this->stack);
            }
            elseif ($tok_name === 'GENERIC_IDENTIFIER') {
                // this is where the fun starts.
                // we have to figure out exactly what this is
                // if we can look ahead and find a '{' before we find a
                // ';', then this is part of a selector.
                // Otherwise it's part of a property/value pair.
                // the exception is when we have something like:
                // font : { family : sans-serif; }
                // then we need to check for ':{'
                if ($in_sq) {
                    $token[0] = 'ATTR_SELECTOR';
                }
                else if ($in_bracket) {
                    $this->_parse_identifier($token);
                }
                elseif(!$in_at) {
                    $semi = $this->next_of_type('SEMICOLON');
                    $colon_brace = $this->next_sequence(array('COLON', 'L_BRACE'),
                        array('WHITESPACE'));
                    $brace = $this->next_of_type('L_BRACE');

                    $rule_terminator = min($semi, $colon_brace);
                    if ($brace < $rule_terminator) {
                        $this->_parse_rule();
                        $prop_value = 'PROPERTY';
                    } else {
                        $this->tokens[$this->index][0] = $prop_value;
                        if ($prop_value === 'VALUE') {
                            $this->_parse_identifier($token);
                        }
                    }
                }

            }
            $this->index++;
        }
        $this->_cleanup();
    }
}