First commit

This commit is contained in:
Pierre Hubert
2016-11-19 12:08:12 +01:00
commit 990540b2b9
4706 changed files with 931207 additions and 0 deletions

View File

@ -0,0 +1,173 @@
<?php
/// @cond ALL
/**
* \file luminous_formatter.class.php
* \brief Formatting logic -- converts Luminous output into displayable formats
*/
/**
* \brief Abstract class to convert Luminous output into a universal format.
*
* Abstract base class to implement an output formatter. A formatter
* will convert Luminous's tags into some kind of output (e.g. HTML), by
* overriding the method Format().
*/
abstract class LuminousFormatter {
/// Number of chars to wrap at
public $wrap_length = 120;
/// Don't use this yet.
public $language_specific_tags = false;
/**
* Tab width, in spaces. If this is -1 or 0, tabs will not be converted. This
* is not recommended as browsers may render tabs as different widths which
* will break the wrapping.
*/
public $tab_width = 2;
/// Whether or not to add line numbering
public $line_numbers = true;
/// Number of first line
public $start_line = 1;
/// An array of lines to be highlighted initially, if the formatter supports
/// it
public $highlight_lines = array();
/// sets whether or not to link URIs.
public $link = true;
/**
* Height of the resulting output. This may or may not make any sense
* depending on the output format.
*
* Use 0 or -1 for no limit.
*/
public $height = 0;
/**
* The language of the source code being highlighted. Formatters may choose
* to do something with this.
*/
public $language = null;
/**
* The main method for interacting with formatter objects.
* @param src the input string, which is of the form output by an instance of
* Luminous.
* @return The input string reformatted to some other specification.
*/
public abstract function format($src);
/**
* If relevant, the formatter should implement this and use LuminousCSSParser
* to port the theme.
* @param $theme A CSS string representing the theme
*/
public function set_theme($theme)
{
}
/**
* @internal
* Handles line wrapping.
* @param line the line which needs to be broken. This is a reference, which
* will be operated upon. After calling, $line will have appropriate line
* breaks to wrap to the given width, and will contain at least one line break
* at the end.
* @param wrap_length the width to wrap to.
*
* @return the number of lines it was broken up into (1 obviously means no
* wrapping occurred.).
*
* @todo wrap to indent? or not? hm.
*
*/
protected static function wrap_line(&$line, $wrap_length) {
// The vast majority of lines will not need wrapping so it pays to
// check this first.
if ($wrap_length <= 0 || !isset($line[$wrap_length])
|| strlen(strip_tags($line)) < $wrap_length) {
$line .= "\n";
return 1;
}
$line_split = preg_split('/((?:<.*?>)|(?:&.*?;)|[ \t]+)/',
$line, -1, PREG_SPLIT_NO_EMPTY|PREG_SPLIT_DELIM_CAPTURE);
$strlen = 0;
$line_cpy = "";
$num_lines = 1;
$num_open = 0;
foreach($line_split as $l) {
$l0 = $l[0];
if ($l0 === '<') {
$line_cpy .= $l;
continue;
}
$s = strlen($l);
if($l0 === '&') {
// html entity codes only count as 1 char.
if(++$strlen > $wrap_length) {
$strlen = 1;
$line_cpy .= "\n";
$num_lines++;
}
$line_cpy .= $l;
continue;
}
if ($s+$strlen <= $wrap_length) {
$line_cpy .= $l;
$strlen += $s;
continue;
}
if ($s <= $wrap_length) {
$line_cpy .= "\n" . $l;
$num_lines++;
$strlen = $s;
continue;
}
// at this point, the line needs wrapping.
// bump us up to the next line
$diff = $wrap_length-$strlen;
$line_cpy .= substr($l, 0, $diff) . "\n";
$l_ = substr($l, $diff);
// now start copying.
$strlen = 0;
// this would probably be marginally faster if it did its own arithmetic
// instead of calling strlen
while (strlen($l_) > 0) {
$strl = strlen($l_);
$num_lines++;
if ($strl > $wrap_length) {
$line_cpy .= substr($l_, 0, $wrap_length) . "\n";
$l_ = substr($l_, $wrap_length);
} else {
$line_cpy .= $l_;
$strlen = $strl;
break;
}
}
}
$line = $line_cpy . "\n";
return $num_lines;
}
}
/// @endcond

View File

@ -0,0 +1,344 @@
<?php
/// @cond ALL
/**
* Collection of templates and templating utilities
*/
class LuminousHTMLTemplates {
// NOTE Don't worry about whitespace in the templates - it gets stripped from the innerHTML,
// so the <pre>s aren't affected. Make it readable :)
/// Normal container
const container_template = '
<div
class="luminous"
data-language="{language}"
style="{height_css}"
>
{subelement}
</div>';
/// Inline code container
const inline_template = '
<div
class="luminous inline"
data-language="{language}"
>
{subelement}
</div>';
/// line number-less
const numberless_template = '
<pre
class="code"
>
{code}
</pre>';
/// line numbered
// NOTE: there's a good reason we use tables here and that's because
// nothing else works reliably.
const numbered_template = '
<table>
<tbody>
<tr>
<td>
<pre class="line-numbers">
{line_numbers}
</pre>
</td>
<td class="code-container">
<pre class="code numbered"
data-startline="{start_line}"
data-highlightlines="{highlight_lines}"
>
{code}
</pre>
</td>
</tr>
</tbody>
</table>';
private static function _strip_template_whitespace_cb($matches) {
return ($matches[0][0] === '<')? $matches[0] : '';
}
private static function _strip_template_whitespace($string) {
return preg_replace_callback('/\s+|<[^>]++>/',
array('self', '_strip_template_whitespace_cb'),
$string);
}
/**
* Formats a string with a given set of values
* The format syntax uses {xyz} as a placeholder, which will be
* substituted from the 'xyz' key from $variables
*
* @param $template The template string
* @param $variables An associative (keyed) array of values to be substituted
* @param $strip_whitespace_from_template If @c TRUE, the template's whitespace is removed.
* This allows templates to be written to be easeier to read, without having to worry about
* the pre element inherting any unintended whitespace
*/
public static function format($template, $variables, $strip_whitespace_from_template = true) {
if ($strip_whitespace_from_template) {
$template = self::_strip_template_whitespace($template);
}
foreach($variables as $search => $replace) {
$template = str_replace("{" . $search . "}", $replace, $template);
}
return $template;
}
}
class LuminousFormatterHTML extends LuminousFormatter {
// overridden by inline formatter
protected $inline = false;
public $height = 0;
/**
* strict HTML standards: the target attribute won't be used in links
* \since 0.5.7
*/
public $strict_standards = false;
private function height_css() {
$height = trim('' . $this->height);
$css = '';
if (!empty($height) && (int)$height > 0) {
// look for units, use px is there are none
if (!preg_match('/\D$/', $height)) $height .= 'px';
$css = "max-height: {$height};";
}
else
$css = '';
return $css;
}
private static function template_cb($matches) {
}
// strips out unnecessary whitespace from a template
private static function template($t, $vars=array()) {
$t = preg_replace_callback('/\s+|<[^>]++>/',
array('self', 'template_cb'),
$t);
array_unshift($vars, $t);
$code = call_user_func_array('sprintf', $vars);
return $code;
}
private function lines_numberless($src) {
$lines = array();
$lines_original = explode("\n", $src);
foreach($lines_original as $line) {
$l = $line;
$num = $this->wrap_line($l, $this->wrap_length);
// strip the newline if we're going to join it. Seems the easiest way to
// fix http://code.google.com/p/luminous/issues/detail?id=10
$l = substr($l, 0, -1);
$lines[] = $l;
}
$lines = implode("\n", $lines);
return $lines;
}
private function format_numberless($src) {
return LuminousHTMLTemplates::format(
LuminousHTMLTemplates::numberless_template,
array(
'height_css' => $this->height_css(),
'code' => $this->lines_numberless($src)
)
);
}
public function format($src) {
$line_numbers = false;
if ($this->link) $src = $this->linkify($src);
$code_block = null;
if ($this->line_numbers) {
$code_block = $this->format_numbered($src);
}
else {
$code_block = $this->format_numberless($src);
}
// convert </ABC> to </span>
$code_block = preg_replace('/(?<=<\/)[A-Z_0-9]+(?=>)/S', 'span',
$code_block);
// convert <ABC> to <span class=ABC>
$cb = create_function('$matches',
'$m1 = strtolower($matches[1]);
return "<span class=\'" . $m1 . "\'>";
');
$code_block = preg_replace_callback('/<([A-Z_0-9]+)>/', $cb, $code_block);
$format_data = array(
'language' => ($this->language === null)? '' : htmlentities($this->language),
'subelement' => $code_block,
'height_css' => $this->height_css()
);
return LuminousHTMLTemplates::format(
$this->inline? LuminousHTMLTemplates::inline_template :
LuminousHTMLTemplates::container_template,
$format_data
);
}
/**
* Detects and links URLs - callback
*/
protected function linkify_cb($matches) {
$uri = (isset($matches[1]) && strlen(trim($matches[1])))? $matches[0]
: "http://" . $matches[0];
// we dont want to link if it would cause malformed HTML
$open_tags = array();
$close_tags = array();
preg_match_all("/<(?!\/)([^\s>]*).*?>/", $matches[0], $open_tags,
PREG_SET_ORDER);
preg_match_all("/<\/([^\s>]*).*?>/", $matches[0], $close_tags,
PREG_SET_ORDER);
if (count($open_tags) != count($close_tags))
return $matches[0];
if (isset($open_tags[0])
&& trim($open_tags[0][1]) !== trim($close_tags[0][1])
)
return $matches[0];
$uri = strip_tags($uri);
$target = ($this->strict_standards)? '' : ' target="_blank"';
return "<a href='{$uri}' class='link'{$target}>{$matches[0]}</a>";
}
/**
* Detects and links URLs
*/
protected function linkify($src) {
if (stripos($src, "http") === false && stripos($src, "www") === false)
return $src;
$chars = "0-9a-zA-Z\$\-_\.+!\*,%";
$src_ = $src;
// everyone stand back, I know regular expressions
$src = preg_replace_callback(
"@(?<![\w])
(?:(https?://(?:www[0-9]*\.)?) | (?:www\d*\.) )
# domain and tld
(?:[$chars]+)+\.[$chars]{2,}
# we don't include tags at the EOL because these are likely to be
# line-enclosing tags.
(?:[/$chars/?=\#;]+|&amp;|<[^>]+>(?!$))*
@xm",
array($this, 'linkify_cb'), $src);
// this can hit a backtracking limit, in which case it nulls our string
// FIXME: see if we can make the above regex more resiliant wrt
// backtracking
if (preg_last_error() !== PREG_NO_ERROR) {
$src = $src_;
}
return $src;
}
private function format_numbered($src) {
$lines = '<span>' .
str_replace("\n", "\n</span><span>", $src, $num_replacements) .
"\n</span>";
$num_lines = $num_replacements + 1;
$line_numbers = '<span>' . implode('</span><span>',
range($this->start_line, $this->start_line + $num_lines - 1, 1)
) . '</span>';
$format_data = array(
'line_number_digits' => strlen( (string)($this->start_line) + $num_lines ), // max number of digits in the line - this is used by the CSS
'start_line' => $this->start_line,
'height_css' => $this->height_css(),
'highlight_lines' => implode(',', $this->highlight_lines),
'code' => $lines,
'line_numbers' => $line_numbers
);
return LuminousHTMLTemplates::format(
LuminousHTMLTemplates::numbered_template,
$format_data
);
}
}
class LuminousFormatterHTMLInline extends LuminousFormatterHTML {
public function format($src) {
$this->line_numbers = false;
$this->height = 0;
$this->inline = true;
return parent::format($src);
}
}
class LuminousFormatterHTMLFullPage extends LuminousFormatterHTML {
protected $theme_css = null;
protected $css = null;
public function set_theme($css) {
$this->theme_css = $css;
}
protected function get_layout() {
// this path info shouldn't really be here
$path = luminous::root() . '/style/luminous.css';
$this->css = file_get_contents($path);
}
public function format($src) {
$this->height = 0;
$this->get_layout();
$fmted = parent::format($src);
return <<<EOF
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<title></title>
<style type='text/css'>
body {
margin: 0;
}
/* luminous.css */
{$this->css}
/* End luminous.css */
/* Theme CSS */
{$this->theme_css}
/* End theme CSS */
</style>
</head>
<body>
<!-- Begin luminous code //-->
$fmted
<!-- End Luminous code //-->
</body>
</html>
EOF;
}
}
/// @endcond

View File

@ -0,0 +1,14 @@
<?php
/// @cond ALL
/**
* Identity formatter. Returns what it's given. Implemented for consistency.
*/
class LuminousIdentityFormatter extends LuminousFormatter {
public function format($str) {
return $str;
}
}
/// @endcond

View File

@ -0,0 +1,200 @@
<?php
/// @cond ALL
require_once(dirname(__FILE__) . '/../utils/cssparser.class.php');
/**
* LaTeX output formatter for Luminous.
*
* \since 0.5.4
*/
class LuminousFormatterLatex extends LuminousFormatter {
private $css = null;
function __construct() { }
function set_theme($theme) {
$this->css = new LuminousCSSParser();
$this->css->convert($theme);
}
/// Converts a hexadecimal string in the form #ABCDEF to an RGB array
/// where each element is normalised to the range 0-1
static function hex2rgb($hex) {
$x = hexdec(substr($hex, 1));
$b = $x % 256;
$g = ($x >> 8) % 256;
$r = ($x >> 16) % 256;
$b /= 255.0;
$g /= 255.0;
$r /= 255.0;
$b = round($b, 2);
$g = round($g, 2);
$r = round($r, 2);
$rgb = array($r, $g, $b);
return $rgb;
}
protected function linkify($src) {
return $src;
}
/// Defines all the styling commands, these are obtained from the css parser
function define_style_commands() {
if ($this->css === null)
throw new Exception('LaTeX formatter has not been set a theme');
$cmds = array();
foreach($this->css->rules() as $name=>$properties) {
if (!preg_match('/^\w+$/', $name))
continue;
$cmd = "{#1}" ;
if ($this->css->value($name, 'bold', false) === true)
$cmd = "{\\textbf$cmd}";
if ($this->css->value($name, 'italic', false) === true)
$cmd = "{\\emph$cmd}";
if (($col = $this->css->value($name, 'color', null)) !== null) {
if (preg_match('/^#[a-f0-9]{6}$/i', $col)) {
$rgb = self::hex2rgb($col);
$col_str = "{$rgb[0]}, {$rgb[1]}, $rgb[2]";
$cmd = "{\\textcolor[rgb]{{$col_str}}$cmd}";
}
}
$name = str_replace('_', '', $name);
$name = strtoupper($name);
$cmds[] = "\\newcommand{\\lms{$name}}[1]$cmd";
}
if ($this->line_numbers &&
($col = $this->css->value('code', 'color', null)) !== null) {
if (preg_match('/^#[a-f0-9]{6}$/i', $col)) {
$rgb = self::hex2rgb($col);
$col_str = "{$rgb[0]}, {$rgb[1]}, $rgb[2]";
$cmd = "\\renewcommand{\\theFancyVerbLine}{%
\\textcolor[rgb]{{$col_str}}{\arabic{FancyVerbLine}}}";
$cmds[] = $cmd;
}
}
return implode("\n", $cmds);
}
function get_background_colour() {
if (($col = $this->css->value('code', 'bgcolor', null)) !== null) {
if (preg_match('/^#[a-f0-9]{6}$/i', $col))
$rgb = self::hex2rgb($col);
$col_str = "{$rgb[0]}, {$rgb[1]}, $rgb[2]";
return "\\pagecolor[rgb]{{$col_str}}";
}
return "";
}
function format($str) {
$out = '';
$verbcmd = "\\begin{Verbatim}[commandchars=\\\\\\{\}";
if ($this->line_numbers)
$verbcmd .= ",numbers=left,firstnumber=1,stepnumber=1";
$verbcmd .= ']';
// define the preamble
$out .= <<<EOF
\documentclass{article}
\usepackage{fullpage}
\usepackage{color}
\usepackage{fancyvrb}
\begin{document}
{$this->define_style_commands()}
{$this->get_background_colour()}
$verbcmd
EOF;
$s = '';
$str = preg_replace('%<([^/>]+)>\s*</\\1>%', '', $str);
$str = str_replace("\t", ' ', $str);
$lines = explode("\n", $str);
if ($this->wrap_length > 0) {
$str = '';
foreach($lines as $i=>$l) {
$this->wrap_line($l, $this->wrap_length);
$str .= $l;
}
}
$str_ = preg_split('/(<[^>]+>)/', $str, -1,
PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
$f1 = create_function('$matches', '
return "\\\lms" . str_replace("_", "", $matches[1]) . "{"; ');
$f2 = create_function('$matches', '
if ($matches[0][0] == "\\\")
return "{\\\textbackslash}";
return "\\\" . $matches[0];');
foreach($str_ as $s_) {
if ($s_[0] === '<') {
$s_ = preg_replace('%</[^>]+>%', '}', $s_);
$s_ = preg_replace_callback('%<([^>]+)>%', $f1
,$s_);
} else {
$s_ = str_replace('&gt;', '>', $s_);
$s_ = str_replace('&lt;', '<', $s_);
$s_ = str_replace('&amp;', '&', $s_);
$s_ = preg_replace_callback('/[#{}_$\\\&]|&(?=amp;)/', $f2, $s_);
}
$s .= $s_;
}
unset($str_);
$s = "\\lmsCODE{" . $s . '}';
/* XXX:
* hack alert: leaving newline literals (\n) inside arguments seems to
* leave them being totally ignored. This is a problem for wrapping.
*
* the current solution is to close all open lms commands before the
* newline then reopen them afterwards.
*/
$stack = array();
$pieces = preg_split('/(\\\lms[^\{]+\{|(?<!\\\)(\\\\\\\\)*[\{\}])/', $s,
-1, PREG_SPLIT_DELIM_CAPTURE|PREG_SPLIT_NO_EMPTY);
// NOTE: p being a reference is probably going to necessitate a lot of
// copying to pass through all these preg_* and str* calls.
// consider rewriting.
foreach($pieces as $k=>&$p) {
if (preg_match('/^\\\lms/', $p))
$stack[] = "" . $p;
elseif(preg_match('/^(\\\\\\\\)*\}/', $p)) {
array_pop($stack);
}
elseif(preg_match('/^(\\\\\\\\)*{/', $p))
$stack [] = $p;
elseif(strpos($p, "\n") !== false) {
$before = "";
$after = "";
foreach($stack as $st_) {
$before .= $st_;
$after .= '}';
}
$p = str_replace("\n", "$after\n$before" , $p);
}
}
$s = implode('', $pieces);
$out .= $s;
$out .= <<<EOF
\end{Verbatim}
\end{document}
EOF;
return $out;
}
}
/// @endcond