mirror of
				https://github.com/pierre42100/comunic
				synced 2025-11-03 19:54:11 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			335 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			335 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
<?php
 | 
						|
/**
 | 
						|
 * The SCSS scanner is quite complex, having to deal with nested rules
 | 
						|
 * and so forth and some disambiguation is non-trivial, so we are employing
 | 
						|
 * a two-pass approach here - we first tokenize the source as normal with a 
 | 
						|
 * scanner, then we parse the token stream with a parser to figure out
 | 
						|
 * what various things really are.
 | 
						|
 */
 | 
						|
class LuminousSCSSScanner extends LuminousScanner {
 | 
						|
    private $regexen = array();
 | 
						|
    
 | 
						|
    public $rule_tag_map = array(
 | 
						|
        'PROPERTY' => 'TYPE',
 | 
						|
        'COMMENT_SL' => 'COMMENT',
 | 
						|
        'COMMENT_ML' => 'COMMENT',
 | 
						|
        'ELEMENT_SELECTOR' => 'KEYWORD',
 | 
						|
        'STRING_S' => 'STRING',
 | 
						|
        'STRING_D' => 'STRING',
 | 
						|
        'CLASS_SELECTOR' => 'VARIABLE',
 | 
						|
        'ID_SELECTOR' => 'VARIABLE',
 | 
						|
        'PSEUDO_SELECTOR' => 'OPERATOR',
 | 
						|
        'ATTR_SELECTOR' => 'OPERATOR',
 | 
						|
        'WHITESPACE' => null,
 | 
						|
        'COLON' => 'OPERATOR',
 | 
						|
        'SEMICOLON' => 'OPERATOR',
 | 
						|
        'COMMA' => 'OPERATOR',
 | 
						|
        'R_BRACE' => 'OPERATOR', 
 | 
						|
        'R_BRACKET' => 'OPERATOR',
 | 
						|
        'R_SQ_BRACKET' => 'OPERATOR',
 | 
						|
        'L_BRACE' => 'OPERATOR', 
 | 
						|
        'L_BRACKET' => 'OPERATOR',
 | 
						|
        'L_SQ_BRACKET' => 'OPERATOR',
 | 
						|
        'OTHER_OPERATOR' => 'OPERATOR',
 | 
						|
        'GENERIC_IDENTIFIER' => null,
 | 
						|
        'AT_IDENTIFIER' => 'KEYWORD',
 | 
						|
        'IMPORTANT' => 'KEYWORD',
 | 
						|
    );
 | 
						|
    
 | 
						|
    public function init() {
 | 
						|
        $this->regexen = array(
 | 
						|
            // For the first pass we just feed in a bunch of tokens.
 | 
						|
            // Some of these are generic and will require disambiguation later
 | 
						|
            'COMMENT_SL' => LuminousTokenPresets::$C_COMMENT_SL,
 | 
						|
            'COMMENT_ML' =>  LuminousTokenPresets::$C_COMMENT_ML,
 | 
						|
            'STRING_S' => LuminousTokenPresets::$SINGLE_STR,
 | 
						|
            'STRING_D' => LuminousTokenPresets::$DOUBLE_STR,
 | 
						|
            // TODO check var naming, is $1 a legal variable?
 | 
						|
            'VARIABLE' => '%\$[\-a-z_0-9]+ | \#\{\$[\-a-z_0-9]+\} %x',
 | 
						|
            'AT_IDENTIFIER' => '%@[a-zA-Z0-9]+%',
 | 
						|
            
 | 
						|
            // This is generic - it may be a selector fragment, a rule, or
 | 
						|
            // even a hex colour.
 | 
						|
            'GENERIC_IDENTIFIER' => '@
 | 
						|
                \\#[a-fA-F0-9]{3}(?:[a-fA-F0-9]{3})?
 | 
						|
                |
 | 
						|
                [0-9]+(\.[0-9]+)?(\w+|%|in|cm|mm|em|ex|pt|pc|px|s)?
 | 
						|
                |
 | 
						|
                -?[a-zA-Z_\-0-9]+[a-zA-Z_\-0-9]*
 | 
						|
                |&
 | 
						|
            @x',
 | 
						|
            'IMPORTANT' => '/!important/',
 | 
						|
            'L_BRACE' => '/\{/',
 | 
						|
            'R_BRACE' => '/\}/',
 | 
						|
            'L_SQ_BRACKET' => '/\[/',
 | 
						|
            'R_SQ_BRACKET' => '/\]/',
 | 
						|
            'L_BRACKET' => '/\(/',
 | 
						|
            'R_BRACKET' => '/\)/',
 | 
						|
            
 | 
						|
            'DOUBLE_COLON' => '/::/',
 | 
						|
            'COLON' => '/:/',
 | 
						|
            'SEMICOLON' => '/;/',
 | 
						|
            
 | 
						|
            'DOT' => '/\./',
 | 
						|
            'HASH' => '/#/',
 | 
						|
            
 | 
						|
            'COMMA' => '/,/',
 | 
						|
            
 | 
						|
            'OTHER_OPERATOR' => '@[+\-*/%&>=!]@',
 | 
						|
 | 
						|
            'WHITESPACE' => '/\s+/'
 | 
						|
        );
 | 
						|
    }
 | 
						|
    
 | 
						|
    
 | 
						|
    public function main() {
 | 
						|
        while (!$this->eos()) {
 | 
						|
            $m = null;
 | 
						|
            foreach($this->regexen as $token=>$pattern) {
 | 
						|
                if ( ($m = $this->scan($pattern)) !== null) {
 | 
						|
                    $this->record($m, $token);
 | 
						|
                    break;
 | 
						|
                }
 | 
						|
            }
 | 
						|
            if ($m === null) {
 | 
						|
                $this->record($this->get(), null);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        $parser = new LuminousSASSParser();
 | 
						|
        $parser->tokens = $this->tokens;
 | 
						|
        $parser->parse();
 | 
						|
        $this->tokens = $parser->tokens;
 | 
						|
    }
 | 
						|
}
 | 
						|
/**
 | 
						|
 * The parsing class 
 | 
						|
 */
 | 
						|
class LuminousSASSParser {
 | 
						|
    
 | 
						|
    public $tokens;
 | 
						|
    public $index;
 | 
						|
    public $stack;
 | 
						|
    static $delete_token = 'delete';
 | 
						|
    
 | 
						|
    /**
 | 
						|
     * Returns true if the next token is the given token name
 | 
						|
     * optionally skipping whitespace
 | 
						|
     */
 | 
						|
    function next_is($token_name, $ignore_whitespace = false) {
 | 
						|
        $i = $this->index+1;
 | 
						|
        $len = count($this->tokens);
 | 
						|
        while($i<$len) {
 | 
						|
            $tok = $this->tokens[$i][0];
 | 
						|
            if ($ignore_whitespace && $tok === 'WHITESPACE') {
 | 
						|
                $i++;
 | 
						|
            }
 | 
						|
            else {
 | 
						|
                return $tok === $token_name;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        return false;
 | 
						|
    }
 | 
						|
    /**
 | 
						|
     * Returns the index of the next match of the sequence of tokens
 | 
						|
     * given, optionally ignoring ertain tokens
 | 
						|
     */
 | 
						|
    function next_sequence($sequence, $ignore=array()) {
 | 
						|
        $i = $this->index+1;
 | 
						|
        $len = count($this->tokens);
 | 
						|
        $seq_len = count($sequence);
 | 
						|
        $seq = 0;
 | 
						|
        $seq_start = 0;
 | 
						|
        while ($i<$len) {
 | 
						|
            $tok = $this->tokens[$i][0];
 | 
						|
            if ($tok === $sequence[$seq]) {
 | 
						|
                if ($seq === 0) $seq_start = $i;
 | 
						|
                $seq++;
 | 
						|
                $i++;
 | 
						|
                if ($seq === $seq_len) {
 | 
						|
                    return $seq_start;
 | 
						|
                }
 | 
						|
            } else {
 | 
						|
                if (in_array($tok, $ignore)) {}
 | 
						|
                else {
 | 
						|
                    $seq = 0;
 | 
						|
                }
 | 
						|
                $i++;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        return $len;
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * Returns the first token which occurs out of the set of given tokens
 | 
						|
     */
 | 
						|
    function next_of($token_names) {
 | 
						|
        $i = $this->index+1;
 | 
						|
        $len = count($this->tokens);
 | 
						|
        while ($i<$len) {
 | 
						|
            $tok = $this->tokens[$i][0];
 | 
						|
            if (in_array($tok, $token_names)) {
 | 
						|
                return $tok;
 | 
						|
            }
 | 
						|
            $i++;
 | 
						|
        }
 | 
						|
        return null;
 | 
						|
        
 | 
						|
    }
 | 
						|
    /**
 | 
						|
     * Returns the index of the next token with the given token name
 | 
						|
     */
 | 
						|
    function next_of_type($token_name) {
 | 
						|
        $i = $this->index+1;
 | 
						|
        $len = count($this->tokens);
 | 
						|
        while($i<$len) {
 | 
						|
            $tok = $this->tokens[$i][0];
 | 
						|
            if ($tok === $token_name) {
 | 
						|
                return $i;
 | 
						|
            }
 | 
						|
            $i++;
 | 
						|
        }
 | 
						|
        return $len;
 | 
						|
    }
 | 
						|
    
 | 
						|
    private function _parse_identifier($token) {
 | 
						|
        $val = $token[1];
 | 
						|
        $c = isset($val[0])? $val[0] : '';
 | 
						|
        if (ctype_digit($c) || $c === '#') {
 | 
						|
            $token[0] = 'NUMERIC';
 | 
						|
        }
 | 
						|
    }
 | 
						|
    
 | 
						|
    /**
 | 
						|
    * Parses a selector rule 
 | 
						|
    */
 | 
						|
    private function _parse_rule() {  
 | 
						|
        $new_token = $this->tokens[$this->index];
 | 
						|
        $set = false;
 | 
						|
        if ($this->index > 0) {
 | 
						|
            $prev_token = &$this->tokens[$this->index-1];
 | 
						|
            $prev_token_type = &$prev_token[0];
 | 
						|
            $prev_token_text = &$prev_token[1];
 | 
						|
            $concat = false;
 | 
						|
            
 | 
						|
            $map = array(
 | 
						|
                'DOT' => 'CLASS_SELECTOR',
 | 
						|
                'HASH' => 'ID_SELECTOR',
 | 
						|
                'COLON' => 'PSEUDO_SELECTOR',
 | 
						|
                'DOUBLE_COLON' => 'PSEUDO_SELECTOR'
 | 
						|
            );
 | 
						|
            if (isset($map[$prev_token_type])) {
 | 
						|
                // mark the prev token for deletion and concat into one.
 | 
						|
                $new_token[0] = $map[$prev_token_type];
 | 
						|
                $prev_token_type = self::$delete_token;
 | 
						|
                $new_token[1] = $prev_token_text . $new_token[1];
 | 
						|
                $set = true;
 | 
						|
            }
 | 
						|
        }
 | 
						|
        if (!$set) {
 | 
						|
            // must be an element
 | 
						|
            $new_token[0] = 'ELEMENT_SELECTOR';
 | 
						|
        }
 | 
						|
        $this->tokens[$this->index] = $new_token;
 | 
						|
    }
 | 
						|
    
 | 
						|
    /**
 | 
						|
     * Cleans up the token stream by deleting any tokens marked for 
 | 
						|
     * deletion, and makes sure the array is continuous afterwards.
 | 
						|
     */
 | 
						|
    private function _cleanup() {
 | 
						|
        foreach($this->tokens as $i=>$t) {
 | 
						|
            if ($t[0] === self::$delete_token) {
 | 
						|
                unset($this->tokens[$i]);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        $this->tokens = array_values($this->tokens);
 | 
						|
    }
 | 
						|
    /**
 | 
						|
     * Main parsing function
 | 
						|
     */
 | 
						|
    public function parse() {
 | 
						|
        $new_tokens = array();
 | 
						|
        $len = count($this->tokens);
 | 
						|
        $this->stack = array();
 | 
						|
        $prop_value = 'PROPERTY';
 | 
						|
        $pushes = array(
 | 
						|
            'L_BRACKET' => 'bracket',
 | 
						|
            'L_BRACE' => 'brace',
 | 
						|
            'AT_IDENTIFIER' => 'at',
 | 
						|
            'L_SQ_BRACKET' => 'square'
 | 
						|
        );
 | 
						|
        $pops = array(
 | 
						|
            'R_BRACKET' => 'bracket',
 | 
						|
            'R_BRACE' => 'brace',
 | 
						|
            'R_SQ_BRACKET' => 'square'
 | 
						|
        );
 | 
						|
        $this->index = 0;
 | 
						|
        while($this->index < $len) {
 | 
						|
            $token = &$this->tokens[$this->index];
 | 
						|
            $stack_size = count($this->stack);
 | 
						|
            $state = !$stack_size? null : $this->stack[$stack_size-1];
 | 
						|
            $tok_name = &$token[0];
 | 
						|
            $in_brace = in_array('brace', $this->stack);
 | 
						|
            $in_bracket = in_array('bracket', $this->stack);
 | 
						|
            $in_sq = in_array('square', $this->stack);
 | 
						|
            $in_at = in_array('at', $this->stack);
 | 
						|
            if ($tok_name === self::$delete_token) continue;
 | 
						|
            
 | 
						|
            if ($tok_name === 'L_BRACE') {
 | 
						|
                if ($state === 'at') {
 | 
						|
                    array_pop($this->stack);
 | 
						|
                }
 | 
						|
                $this->stack[] = $pushes[$tok_name];
 | 
						|
                $prop_value = 'PROPERTY';
 | 
						|
            }
 | 
						|
            elseif (isset($pushes[$tok_name])) {
 | 
						|
                $this->stack[] = $pushes[$tok_name];
 | 
						|
            } else if (isset($pops[$tok_name]) && $state === $pops[$tok_name]) {
 | 
						|
                array_pop($this->stack);
 | 
						|
            }
 | 
						|
            elseif (!$in_bracket && $tok_name === 'COLON') {
 | 
						|
                $prop_value = 'VALUE';
 | 
						|
            }
 | 
						|
            elseif ($tok_name === 'SEMICOLON') {
 | 
						|
                $prop_value = 'PROPERTY';
 | 
						|
                if ($state === 'at') array_pop($this->stack);
 | 
						|
            }
 | 
						|
            elseif ($tok_name === 'GENERIC_IDENTIFIER') {
 | 
						|
                // this is where the fun starts.
 | 
						|
                // we have to figure out exactly what this is
 | 
						|
                // if we can look ahead and find a '{' before we find a 
 | 
						|
                // ';', then this is part of a selector.
 | 
						|
                // Otherwise it's part of a property/value pair.
 | 
						|
                // the exception is when we have something like:
 | 
						|
                // font : { family : sans-serif; } 
 | 
						|
                // then we need to check for ':{'
 | 
						|
                if ($in_sq) {
 | 
						|
                    $token[0] = 'ATTR_SELECTOR';
 | 
						|
                }
 | 
						|
                else if ($in_bracket) {
 | 
						|
                    $this->_parse_identifier($token);
 | 
						|
                }
 | 
						|
                elseif(!$in_at) {
 | 
						|
                    $semi = $this->next_of_type('SEMICOLON');
 | 
						|
                    $colon_brace = $this->next_sequence(array('COLON', 'L_BRACE'),
 | 
						|
                        array('WHITESPACE'));
 | 
						|
                    $brace = $this->next_of_type('L_BRACE');
 | 
						|
                    
 | 
						|
                    $rule_terminator = min($semi, $colon_brace);
 | 
						|
                    if ($brace < $rule_terminator) {
 | 
						|
                        $this->_parse_rule();
 | 
						|
                        $prop_value = 'PROPERTY';
 | 
						|
                    } else {
 | 
						|
                        $this->tokens[$this->index][0] = $prop_value;
 | 
						|
                        if ($prop_value === 'VALUE') {
 | 
						|
                            $this->_parse_identifier($token);
 | 
						|
                        }
 | 
						|
                    }
 | 
						|
                }
 | 
						|
                
 | 
						|
            }
 | 
						|
            $this->index++;
 | 
						|
        }
 | 
						|
        $this->_cleanup();
 | 
						|
    }
 | 
						|
} |