Page MenuHomeIn-Portal Phabricator

pdf_styles.php
No OneTemporary

File Metadata

Created
Sat, Feb 1, 9:09 AM

pdf_styles.php

<?php
/**
* @version $Id: pdf_styles.php 12734 2009-10-20 19:28:11Z alex $
* @package In-Portal
* @copyright Copyright (C) 1997 - 2009 Intechnic. All rights reserved.
* @license GNU/GPL
* In-Portal is Open Source software.
* This means that this software may have been modified pursuant
* the GNU General Public License, and as distributed it includes
* or is derivative of works licensed under the GNU General Public License
* or other free or open source software licenses.
* See http://www.in-portal.org/license for copyright notices and details.
*/
defined('FULL_PATH') or die('restricted access!');
require_once(FULL_PATH.'/core/units/pdf/css_defaults.php');
class kPDFStylesheet {
protected $Selectors;
protected $Macros;
protected $Buffer = array();
protected $Openings = array();
protected $Level = 0;
public $Rules = array();
public $SelectorOrder = 0;
public $HTMLVisualPropsSelectorOrder = 0;
const STYLE_ORIGIN_AGENT_NORMAL = 0;
const STYLE_ORIGIN_USER_NORMAL = 1;
const STYLE_ORIGIN_AUTHOR_NORMAL = 2;
const STYLE_ORIGIN_AUTHOR_IMPORTANT = 3;
const STYLE_ORIGIN_USER_IMPORTANT = 4;
function __construct($nodefaults = false)
{
$this->Prepare();
if (!$nodefaults) {
$tokens = $this->GetTokens(kCSSDefaults::$DEFAULT_STYLE);
$this->ParseTokens($tokens, kPDFStylesheet::STYLE_ORIGIN_AGENT_NORMAL);
$this->HTMLVisualPropsSelectorOrder = $this->SelectorOrder;
$this->SelectorOrder += 1000;
}
}
public function ParseStyle($style)
{
$res = array();
$pairs = explode(';', $style);
foreach ($pairs as $property) {
$property = trim($property);
list($name, $value) = explode(':', $property);
$res[trim($name)] = trim($value);
}
}
/*
stylesheet : [ CDO | CDC | S | statement ]*;
statement : ruleset | at-rule;
at-rule : ATKEYWORD S* any* [ block | ';' S* ];
block : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
ruleset : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
selector : any+;
declaration : DELIM? property S* ':' S* value;
property : IDENT;
value : [ any | block | ATKEYWORD S* ]+;
any : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
| DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
| DASHMATCH | FUNCTION S* any* ')'
| '(' S* any* ')' | '[' S* any* ']' ] S*;
*/
function ParseTokens($tokens, $origin=kPDFStylesheet::STYLE_ORIGIN_AUTHOR_NORMAL )
{
$this->Buffer[0] = Array();
foreach ($tokens as $token) {
if ($token['name'] == 'LBRACE') {
$this->Buffer[++$this->Level] = Array();
$this->Openings[$this->Level] = 'LBRACE';
}
elseif ($token['name'] == 'TEXT' && $token['data'] == '}') {
if ($this->Level == 1 && $this->Openings[$this->Level] == 'LBRACE') {
$this->AppendRule($this->Buffer[0], $this->Buffer[$this->Level], $origin);
$this->Buffer[0] = Array();
}
$this->Level--;
}
else {
$this->Buffer[$this->Level][] = $token;
}
}
}
protected function ConcatTokensData($tokens)
{
$res = '';
foreach ($tokens as $token) {$res .= $token['data'];}
return $res;
}
public function ParseDefinitionTokens($tokens)
{
$mode = 'property';
$properties = array();
$value = '';
foreach ($tokens as $token) {
if ($mode == 'property') {
if ($token['name'] == 'IDENT') {
$property = $token['data'];
$mode = 'colon';
}
}
elseif ($mode == 'colon') {
if ($token['name'] == 'TEXT' && $token['data'] == ':') {
$mode = 'value';
}
}
elseif ($mode == 'value') {
if ($token['name'] == 'TEXT' && $token['data'] == ';') {
$properties[strtoupper($property)] = trim($value);
$value = '';
$mode = 'property';
}
else {
$value .= $token['data'];
}
}
}
if ($mode == 'value') {
$properties[strtoupper($property)] = trim($value);
}
if ($mode == 'colon') {
trigger_error('Error parsing CSS definition, no colon and/or value after property '.$property, E_USER_WARNING);
}
$properties = $this->ProcessShortHands($properties);
return $properties;
}
public function ProcessShortHands($properties)
{
$res = array();
foreach ($properties as $property => $value)
{
switch ($property) {
case 'MARGIN':
if (preg_match('/^([.0-9]+(?:px|pt|em|ex|%)?|auto)$/i', $value, $regs)) {
$res['MARGIN-TOP'] = $regs[1];
$res['MARGIN-RIGHT'] = $regs[1];
$res['MARGIN-BOTTOM'] = $regs[1];
$res['MARGIN-LEFT'] = $regs[1];
}
if (preg_match('/^([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto)$/i', $value, $regs)) {
$res['MARGIN-TOP'] = $regs[1];
$res['MARGIN-RIGHT'] = $regs[2];
$res['MARGIN-BOTTOM'] = $regs[1];
$res['MARGIN-LEFT'] = $regs[2];
}
if (preg_match('/^([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto) ([.0-9]+(?:px|pt|em|ex|%)?|auto)$/i', $value, $regs)) {
$res['MARGIN-TOP'] = $regs[1];
$res['MARGIN-RIGHt'] = $regs[2];
$res['MARGIN-BOTTOM'] = $regs[3];
$res['MARGIN-LEFT'] = $regs[4];
}
break;
case 'BORDER-TOP':
case 'BORDER-RIGHT':
case 'BORDER-BOTTOM':
case 'BORDER-LEFT':
$parts = $this->ParseBorderShorthand($value);
if (isset($parts['style'])) {
$res[$property.'-STYLE'] = $parts['style'];
}
if (isset($parts['width'])) {
$res[$property.'-WIDTH'] = $parts['width'];
}
if (isset($parts['color'])) {
$res[$property.'-COLOR'] = $parts['color'];
}
break;
case 'BORDER':
$parts = $this->ParseBorderShorthand($value);
if (isset($parts['style'])) {
$res['BORDER-TOP-STYLE'] = $parts['style'];
$res['BORDER-RIGHT-STYLE'] = $parts['style'];
$res['BORDER-BOTTOM-STYLE'] = $parts['style'];
$res['BORDER-LEFT-STYLE'] = $parts['style'];
}
if (isset($parts['width'])) {
$res['BORDER-TOP-WIDTH'] = $parts['width'];
$res['BORDER-RIGHT-WIDTH'] = $parts['width'];
$res['BORDER-BOTTOM-WIDTH'] = $parts['width'];
$res['BORDER-LEFT-WIDTH'] = $parts['width'];
}
if (isset($parts['color'])) {
$res['BORDER-TOP-COLOR'] = $parts['color'];
$res['BORDER-RIGHT-COLOR'] = $parts['color'];
$res['BORDER-BOTTOM-COLOR'] = $parts['color'];
$res['BORDER-LEFT-COLOR'] = $parts['color'];
}
break;
case 'PADDING':
$parts = explode(' ', $value);
switch (count($parts)) {
case 1:
$res['PADDING-TOP'] = $parts[0];
$res['PADDING-RIGHT'] = $parts[0];
$res['PADDING-BOTTOM'] = $parts[0];
$res['PADDING-LEFT'] = $parts[0];
break;
case 2:
$res['PADDING-TOP'] = $parts[0];
$res['PADDING-RIGHT'] = $parts[1];
$res['PADDING-BOTTOM'] = $parts[0];
$res['PADDING-LEFT'] = $parts[1];
break;
case 3:
$res['PADDING-TOP'] = $parts[0];
$res['PADDING-RIGHT'] = $parts[1];
$res['PADDING-BOTTOM'] = $parts[2];
$res['PADDING-LEFT'] = $parts[1];
break;
case 4:
$res['PADDING-TOP'] = $parts[0];
$res['PADDING-RIGHT'] = $parts[1];
$res['PADDING-BOTTOM'] = $parts[2];
$res['PADDING-LEFT'] = $parts[3];
break;
}
break;
default:
$res[$property] = $value;
}
}
return $res;
}
public function ParseBorderShorthand($definition)
{
$res = array();
$parts = explode(' ', $definition);
foreach ($parts as $part) {
if (preg_match('/none|hidden|dotted|dashed|solid|double|groove|ridge|inset|outset/', $part)) { //style
$res['style'] = $part;
}
elseif (preg_match('/^(thin|medium|thick|[.0-9]+(?:px|pt|em|ex|%)?)/', $part)) { // width
$res['width'] = $part;
}
else { // color
$res['color'] = $part;
}
}
return $res;
}
public function ParseSelectorTokens($tokens, $origin)
{
$selectors = array();
$current = '';
foreach ($tokens as $token) {
if ($token['name'] == 'COMMA') {
$selectors[] = trim($current);
$current = '';
}
else {
$current .= $token['data'];
}
}
if (trim($current) != '') {
$selectors[] = trim($current);
}
return $this->IdentifySelectors($selectors, $origin);
}
/*
'h' => '[0-9a-f]',
'nonascii' => '[\\200-\\377]',
'unicode' => '(\\{h}{1,6}(\r\n|[ \t\r\n\f])?)',
'escape' => '(\\[^\r\n\f0-9a-f])',
'nmstart' => '([_a-z]|{nonascii}|{escape})',
'nmchar' => '([_a-z0-9-]|{nonascii}|{escape})',
'string1' => '("([^\n\r\f"]|{nl}|{escape})*")',
'string2' => '(\'([^\n\r\f\']|{nl}|{escape})*\')',
'invalid1' => '("([^\n\r\f"]|{nl}|{escape})*?)',
'invalid2' => '(\'([^\n\r\f\']|{nl}|{escape})*?)',
'ident' => '-?{nmstart}{nmchar}*',
'name' => '{nmchar}+',
'num' => '([0-9]+|[0-9]*\.[0-9]+)',
'string' => '({string1}|{string2})',
'invalid' => '({invalid1}|{invalid2})',
'url' => '([!#$%&*-~]|{nonascii}|{escape})*',
's' => '[ \t\r\n\f]',
'w' => '{s}*',
'nl' => '(\n|\r\n|\r|\f)',
*/
/*
A simple selector is either a type selector or universal selector followed immediately by zero or more attribute selectors, ID selectors,
or pseudo-classes, in any order. The simple selector matches if all of its components match.
A selector is a chain of one or more simple selectors separated by combinators. Combinators are: whitespace, ">", and "+".
Whitespace may appear between a combinator and the simple selectors around it.
A selector's specificity is calculated as follows:
* count 1 if the selector is a 'style' attribute rather than a selector, 0 otherwise (= a)
(In HTML, values of an element's "style" attribute are style sheet rules. These rules have no selectors, so a=1, b=0, c=0, and d=0.)
* count the number of ID attributes in the selector (= b)
* count the number of other attributes and pseudo-classes in the selector (= c)
* count the number of element names and pseudo-elements in the selector (= d)
The specificity is based only on the form of the selector.
In particular, a selector of the form "[id=p33]" is counted as an attribute selector (a=0, b=0, c=1, d=0),
even if the id attribute is defined as an "ID" in the source document's DTD.
Concatenating the four numbers a-b-c-d (in a number system with a large base) gives the specificity.
*/
function IdentifySelectors($selectors, $origin)
{
$processed = array();
$ident = $this->Macros['ident'];
foreach ($selectors as $selector) {
$parts = preg_split('/[ ]*([ >+])[ ]*/', $selector, null, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
$parsed_selector = array();
$a = 0;
$b = 0;
$c = 0;
$d = 0;
foreach ($parts as $simple_selector) {
$parsed_part = array();
if (preg_match('/^([ >+])$/', $simple_selector, $regs)) {
$parsed_part['combinator'] = $regs[1];
$parsed_selector[] = $parsed_part;
continue;
}
if (preg_match('/^(\*|'.$ident.')/i', $simple_selector, $regs)) {
$main = $regs[1];
if ($regs[1] != '*') {
$d++;
}
}
else {
$main = '*';
}
$parsed_part['main'] = strtoupper($main);
if (preg_match_all('/\.('.$ident.')/', $simple_selector, $regs)) {
$parsed_part['classes'] = $regs[1];
$c += count($regs[1]);
}
if (preg_match_all('/\[([^\]]+)\]/', $simple_selector, $regs)) {
$atts = $regs[1];
$c += count($regs[1]);
$parsed_atts = array();
foreach ($atts as $attribute) {
if (preg_match('/^[^=]+$/', $attribute)) {
$parsed_atts['set'][] = strtoupper($attribute);
}
elseif (preg_match('/(.*)\\|=(.*)/', $attribute, $att_regs)) {
$parsed_atts['hypen'][strtoupper($att_regs[1])] = $att_regs[2];
}
elseif (preg_match('/(.*)~=(.*)/', $attribute, $att_regs)) {
$parsed_atts['space'][strtoupper($att_regs[1])] = $att_regs[2];
}
elseif (preg_match('/(.*)=(.*)/', $attribute, $att_regs)) {
$parsed_atts['equals'][strtoupper($att_regs[1])] = $att_regs[2];
}
}
$parsed_part['atts'] = $parsed_atts;
}
if (preg_match_all('/#('.$ident.')/', $simple_selector, $regs)) {
$parsed_part['ids'] = $regs[1];
$b += count($regs[1]);
}
if (preg_match_all('/:('.$ident.')/', $simple_selector, $regs)) {
$pseudo_classes = array();
$pseudo_elements = array();
foreach ($regs[1] as $pseudo) {
if (preg_match('/^(first-line|first-letter|before|after)$/i', $pseudo)) {
$pseudo_elements[] = $pseudo;
}
else {
$pseudo_classes[] = $pseudo;
}
}
if ($pseudo_classes) {
$parsed_part['pseudo_classes'] = $pseudo_classes;
}
if ($pseudo_elements) {
$parsed_part['pseudo_elements'] = $pseudo_elements;
}
$c += count($pseudo_classes);
$d += count($pseudo_elements);
}
$parsed_selector[] = $parsed_part;
}
$parsed_selector = array_reverse($parsed_selector);
$main = array();
$cur =& $main;
foreach ($parsed_selector as $parts) {
if (isset($parts['combinator'])) {
switch ($parts['combinator']) {
case ' ':
$cur =& $cur['descendant_of'];
break;
case '>':
$cur =& $cur['child_of'];
break;
case '+':
$cur =& $cur['sibling_of'];
break;
}
continue;
}
$cur['main'] = $parts['main'];
if (isset($parts['classes'])) {
$cur['classes'] = $parts['classes'];
}
if (isset($parts['ids'])) {
$cur['ids'] = $parts['ids'];
}
if (isset($parts['pseudo_classes'])) {
$cur['pseudo_classes'] = $parts['pseudo_classes'];
}
if (isset($parts['pseudo_elements'])) {
$cur['pseudo_elements'] = $parts['pseudo_elements'];
}
if (isset($parts['atts'])) {
$cur['atts'] = $parts['atts'];
}
}
$main['specifity'] = intval(str_pad($a,2,0).str_pad($b,2,0).str_pad($c,2,0).str_pad($d,2,0));
$main['order'] = $this->SelectorOrder++;
$main['origin'] = $origin;
$processed[] = $main;
}
return $processed;
}
public function AppendRule($selector_tokens, $definition_tokens, $origin)
{
$selectors = $this->ParseSelectorTokens($selector_tokens, $origin);
$properties = $this->ParseDefinitionTokens($definition_tokens);
$definition = '';
foreach ($properties as $property => $value) {
$definition .= "<b>$property</b>: $value<br>";
}
foreach ($selectors as $selector) {
$this->Mapping[strtoupper($selector['main'])][] = array('selector' => $selector, 'properties' => $properties);
}
$this->Rules[] = array('selectors' => $selectors, 'properties' => $properties);
// echo "appending rule:<br> selector: ".join(',', $selectors)."<br> definition:<br> $definition<br><br>";
}
public function GetTokens($css)
{
$patterns = array(
'{s}+' =>'S',
'<!--' =>'CDO',
'-->' =>'CDC',
'~=' =>'INCLUDES',
'\\|=' =>'DASHMATCH',
'{w}\\{' =>'LBRACE',
'{w}\\+' =>'PLUS',
'{w}\\>' =>'GREATER',
'{w},' =>'COMMA',
'{string}' =>'STRING',
'{invalid}' =>'INVALID', /* unclosed string */
'{ident}' =>'IDENT',
'#{name}' =>'HASH',
'@import' =>'IMPORT_SYM',
'@page' =>'PAGE_SYM',
'@media' =>'MEDIA_SYM',
'@charset' =>'CHARSET_SYM',
'!{w}important' =>'IMPORTANT_SYM',
/*'{num}{E}{M}' =>'EMS',
'{num}{E}{X}' =>'EXS',
'{num}{P}{X}' =>'LENGTH',
'{num}{C}{M}' =>'LENGTH',
'{num}{M}{M}' =>'LENGTH',
'{num}{I}{N}' =>'LENGTH',
'{num}{P}{T}' =>'LENGTH',
'{num}{P}{C}' =>'LENGTH',
'{num}{D}{E}{G}' =>'ANGLE',
'{num}{R}{A}{D}' =>'ANGLE',
'{num}{G}{R}{A}{D}' =>'ANGLE',
'{num}{M}{S}' =>'TIME',
'{num}{S}' =>'TIME',
'{num}{H}{Z}' =>'FREQ',
'{num}{K}{H}{Z}' =>'FREQ',
'{num}{ident}' =>'DIMENSION',*/
'{num}em' =>'EMS',
'{num}ex' =>'EXS',
'{num}px' =>'LENGTH',
'{num}cm' =>'LENGTH',
'{num}mm' =>'LENGTH',
'{num}in' =>'LENGTH',
'{num}pt' =>'LENGTH',
'{num}pc' =>'LENGTH',
'{num}deg' =>'ANGLE',
'{num}rad' =>'ANGLE',
'{num}grad' =>'ANGLE',
'{num}ms' =>'TIME',
'{num}s' =>'TIME',
'{num}hz' =>'FREQ',
'{num}khz' =>'FREQ',
'{num}{ident}' =>'DIMENSION',
'{num}%' =>'PERCENTAGE',
'{num}' =>'NUMBER',
'url\({w}{string}{w}\)' =>'URI',
'url\({w}{url}{w}\)' =>'URI',
'{ident}\(' =>'FUNCTION',
/*'.' =>'*yytext',*/
);
$final_patterns = array();
foreach ($patterns as $regexp => $token) {
foreach ($this->Macros as $macro => $replacement) {
$regexp = str_replace('{'.$macro.'}', $replacement, $regexp);
}
$final_patterns[$regexp] = $token;
}
$css = preg_replace('/\\/\\*[^*]*\\*+([^\\/*][^*]*\\*+)*\\//', '', $css);
$css = preg_replace('/[ \t\r\n\f]+\\/\\*[^*]*\\*+([^\\/*][^*]*\\*+)*\\//', ' ', $css);
$css = preg_replace('/[ \t\r\n\f]+/', ' ', $css); // remove repeated whitespace
$matches = array();
$token_indexes = array();
foreach ($final_patterns as $regexp => $token) {
if (preg_match_all('/'.$regexp.'/i', $css, $res, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE)) {
$matches[$token] = $res[0];
$token_indexes[$token] = 0;
}
}
$tokens = array();
$last_token_pos = 0;
$i = 0;
do {
$has_more = false;
$max_len = 0;
$min_pos = false;
foreach ($matches as $token => $data)
{
$cur_index = $token_indexes[$token];
do {
$cur_match = isset($data[$cur_index]) ? $data[$cur_index++] : false;
} while ($cur_match && $cur_match[1] < $last_token_pos);
if ( !$cur_match ) continue;
$token_indexes[$token] = $cur_index-1;
if ( $min_pos === false ||
($cur_match[1] < $min_pos
||
( $cur_match[1] == $min_pos && strlen( $cur_match[0] ) > $max_len )
)
) {
$longest = $token;
$max_len = strlen( $cur_match[0] );
$min_pos = $cur_match[1];
}
$has_more = $has_more || isset($data[$token_indexes[$token]]);
}
if ($min_pos !== false) {
$token_data = $matches[$longest][$token_indexes[$longest]];
if ($token_data[1] > $last_token_pos) {
$text_data = substr($css, $last_token_pos, $token_data[1] - $last_token_pos);
$tokens[] = array('name' => 'TEXT', 'data' => $text_data);
// echo "found token TEXT: [$text_data]<br>\n";
}
$tokens[] = array('name' => $longest, 'data' => $token_data[0]);
// echo "found token $longest: {$token_data[0]} at {$token_data[1]}<br>\n";
// flush();
$last_token_pos = $token_data[1] + strlen($token_data[0]);
$token_indexes[$longest]++;
}
} while ($has_more);
if ($last_token_pos <= strlen($css)) {
$text_data = substr($css, $last_token_pos);
$tokens[] = array('name' => 'TEXT', 'data' => $text_data);
// echo "found token FINAL TEXT: [$text_data]<br>\n";
}
return $tokens;
}
public function Prepare()
{
/*$macros = array(
'h' => '[0-9a-f]',
'nonascii' => '[\200-\377]',
'unicode' => '(\\{h}{1,6}(\r\n|[ \t\r\n\f])?)',
'escape' => '({unicode}|\\[^\r\n\f0-9a-f])',
'nmstart' => '([_a-z]|{nonascii}|{escape})',
'nmchar' => '([_a-z0-9-]|{nonascii}|{escape})',
'string1' => '("([^\n\r\f"]|{nl}|{escape})*")',
'string2' => '(\'([^\n\r\f\']|{nl}|{escape})*\')',
'invalid1' => '("([^\n\r\f"]|{nl}|{escape})*?)',
'invalid2' => '(\'([^\n\r\f\']|{nl}|{escape})*?)',
'ident' => '-?{nmstart}{nmchar}*',
'name' => '{nmchar}+',
'num' => '([0-9]+|[0-9]*\.[0-9]+)',
'string' => '({string1}|{string2})',
'invalid' => '({invalid1}|{invalid2})',
'url' => '([!#$%&*-~]|{nonascii}|{escape})*',
's' => '[ \t\r\n\f]',
'w' => '{s}*',
'nl' => '(\n|\r\n|\r|\f)',
'A' => 'a|\\0{0,4}(41|61)(\r\n|[ \t\r\n\f])?',
'C' => 'c|\\0{0,4}(43|63)(\r\n|[ \t\r\n\f])?',
'D' => 'd|\\0{0,4}(44|64)(\r\n|[ \t\r\n\f])?',
'E' => 'e|\\0{0,4}(45|65)(\r\n|[ \t\r\n\f])?',
'G' => 'g|\\0{0,4}(47|67)(\r\n|[ \t\r\n\f])?|\\g',
'H' => 'h|\\0{0,4}(48|68)(\r\n|[ \t\r\n\f])?|\\h',
'I' => 'i|\\0{0,4}(49|69)(\r\n|[ \t\r\n\f])?|\\i',
'K' => 'k|\\0{0,4}(4b|6b)(\r\n|[ \t\r\n\f])?|\\k',
'M' => 'm|\\0{0,4}(4d|6d)(\r\n|[ \t\r\n\f])?|\\m',
'N' => 'n|\\0{0,4}(4e|6e)(\r\n|[ \t\r\n\f])?|\\n',
'P' => 'p|\\0{0,4}(50|70)(\r\n|[ \t\r\n\f])?|\\p',
'R' => 'r|\\0{0,4}(52|72)(\r\n|[ \t\r\n\f])?|\\r',
'S' => 's|\\0{0,4}(53|73)(\r\n|[ \t\r\n\f])?|\\s',
'T' => 't|\\0{0,4}(54|74)(\r\n|[ \t\r\n\f])?|\\t',
'X' => 'x|\\0{0,4}(58|78)(\r\n|[ \t\r\n\f])?|\\x',
'Z' => 'z|\\0{0,4}(5a|7a)(\r\n|[ \t\r\n\f])?|\\z',
);*/
$simple = array(
'h' => '[0-9a-f]',
'nonascii' => '[\\200-\\377]',
'unicode' => '(\\{h}{1,6}(\r\n|[ \t\r\n\f])?)',
'escape' => '(\\[^\r\n\f0-9a-f])',
'nmstart' => '([_a-z]|{nonascii}|{escape})',
'nmchar' => '([_a-z0-9-]|{nonascii}|{escape})',
'string1' => '("([^\n\r\f"]|{nl}|{escape})*")',
'string2' => '(\'([^\n\r\f\']|{nl}|{escape})*\')',
'invalid1' => '("([^\n\r\f"]|{nl}|{escape})*?)',
'invalid2' => '(\'([^\n\r\f\']|{nl}|{escape})*?)',
'ident' => '-?{nmstart}{nmchar}*',
'name' => '{nmchar}+',
'num' => '([0-9]+|[0-9]*\.[0-9]+)',
'string' => '({string1}|{string2})',
'invalid' => '({invalid1}|{invalid2})',
'url' => '([!#$%&*-~]|{nonascii}|{escape})*',
's' => '[ \t\r\n\f]',
'w' => '{s}*',
'nl' => '(\n|\r\n|\r|\f)',
);
$replaced_macros = array();
foreach ($simple as $key => $macro) {
$replaced = $macro;
foreach ($replaced_macros as $shorthand => $replacement) {
$replaced = str_replace('{'.$shorthand.'}', $replacement, $replaced);
}
$replaced_macros[$key] = $replaced;
}
$this->Macros = $replaced_macros;
}
public function GetHTMLVisualPropsSelector($node)
{
if (!$node->Attributes) return false;
$non_visal_props = array(
'ABBR', 'ACCEPT-CHARSET', 'ACCEPT', 'ACCESSKEY', 'ACTION', 'ALT', 'ARCHIVE', 'AXIS', 'CHARSET', 'CHECKED', 'CITE', 'CLASS', 'CLASSID', 'CODE', 'CODEBASE',
'CODETYPE', 'COLSPAN', 'COORDS', 'DATA', 'DATETIME', 'DECLARE', 'DEFER', 'DIR', 'DISABLED', 'ENCTYPE', 'FOR', 'HEADERS', 'HREF', 'HREFLANG', 'HTTP-EQUIV',
'ID', 'ISMAP', 'LABEL', 'LANG', 'LANGUAGE', 'LONGDESC', 'MAXLENGTH', 'MEDIA', 'METHOD', 'MULTIPLE', 'NAME', 'NOHREF', 'OBJECT', 'ONBLUR', 'ONCHANGE',
'ONCLICK', 'ONDBLCLICK', 'ONFOCUS', 'ONKEYDOWN', 'ONKEYPRESS', 'ONKEYUP', 'ONLOAD', 'ONLOAD', 'ONMOUSEDOWN', 'ONMOUSEMOVE', 'ONMOUSEOUT', 'ONMOUSEOVER',
'ONMOUSEUP', 'ONRESET', 'ONSELECT', 'ONSUBMIT', 'ONUNLOAD', 'ONUNLOAD', 'PROFILE', 'PROMPT', 'READONLY', 'REL', 'REV', 'ROWSPAN', 'SCHEME', 'SCOPE',
'SELECTED', 'SHAPE', 'SPAN', 'SRC', 'STANDBY', 'START', 'STYLE', 'SUMMARY', 'TITLE', 'USEMAP',
'VALUE', 'VALUETYPE', 'VERSION',
);
if ($node->Name != 'LI' && $node->Name != 'OL' && $node->Name != 'UL') {
array_push($non_visal_props, 'TYPE');
}
$visual_attributes = array_diff_key($node->Attributes, array_combine($non_visal_props, array_fill(0, count($non_visal_props), '')));
if ($visual_attributes) {
$mapping = array(
'ALIGN' => 'TEXT-ALIGN',
'VALIGN' => 'VERTICAL-ALIGN',
'CELLPADDING' => 'PADDING',
);
$mapped_attributes = array();
foreach ($visual_attributes as $key => $val) {
if ($key == 'CELLPADDING') {
$processed = $this->IdentifySelectors( array( $node->Name.'[cellpadding='.$val.'] TD' ), kPDFStylesheet::STYLE_ORIGIN_AUTHOR_NORMAL );
$processed[0]['order'] = $this->HTMLVisualPropsSelectorOrder++;
$processed[0]['specifity'] = 0;
$this->Mapping['TD'][] = array(
'selector' => $processed[0],
'properties' => $this->ProcessShortHands(array(
'PADDING' => $val.'px',
)));
}
elseif (isset($mapping[$key])) {
$mapped_attributes[$mapping[$key]] = $val;
}
else {
$mapped_attributes[$key] = $val;
}
}
return array(
'selector' => array('main' => $node->Name, 'specifity' => 0, 'order' => $this->HTMLVisualPropsSelectorOrder, 'origin' => kPDFStylesheet::STYLE_ORIGIN_AUTHOR_NORMAL ),
'properties' => $mapped_attributes,
);
}
return false;
}
public function GetMatchingSelectors($node)
{
$map = isset($this->Mapping[$node->Name]) ? $this->Mapping[$node->Name] : array();
if (isset($this->Mapping['*'])) {
$map = array_merge($map, $this->Mapping['*']);
}
$matching = array();
$i = 0;
foreach ($map as $selector) {
$selector_data = $selector['selector'];
if ($this->SelectorMatches($selector['selector'], $node)) {
$matching[] = $selector;
}
}
$html_visual_selector = $this->GetHTMLVisualPropsSelector($node);
if ($html_visual_selector) {
$matching[] = $html_visual_selector;
}
usort($matching, array($this, 'CmpSelectors'));
if (isset($node->Attributes['STYLE'])) {
$style_selector = array(
'selector' => array('main' => '_STYLE_'),
'properties' => $this->ParseDefinitionTokens ( $this->GetTokens( $node->Attributes['STYLE'] ) ),
);
$matching[] = $style_selector;
}
return $matching;
}
public function CmpSelectors($a, $b)
{
if ($a['selector']['origin'] == $b['selector']['origin']) {
if ($a['selector']['specifity'] == $b['selector']['specifity']) {
return $a['selector']['order'] < $b['selector']['order'] ? -1 : 1;
}
return ($a['selector']['specifity'] < $b['selector']['specifity']) ? -1 : 1;
}
return $a['selector']['origin'] < $b['selector']['origin'] ? -1 : 1;
}
public function SelectorMatches($selector_data, $node)
{
if ($selector_data['main'] != '*' && $node->Name != $selector_data['main']) {
return false;
}
//check classes
if (isset($selector_data['classes'])) {
foreach ($selector_data['classes'] as $class) {
// (\A| )+foo( |\Z)+
if (!isset($node->Attributes['CLASS']) || !preg_match('/(\A| )+'.preg_quote($class).'( |\Z)+/i', $node->Attributes['CLASS'])) {
return false;
}
}
}
//check ids
if (isset($selector_data['ids'])) {
if (!isset($node->Attributes['ID']) || !in_array($node->Attributes['ID'], $selector_data['ids'])) {
return false;;
}
}
//check atts
if (isset($selector_data['atts'])) {
if (isset($selector_data['atts']['set'])) {
foreach ($selector_data['atts']['set'] as $att) {
if (!isset($node->Attributes[$att])) {
return false;;
}
}
}
if (isset($selector_data['atts']['equals'])) {
foreach ($selector_data['atts']['equals'] as $att => $value) {
if (!isset($node->Attributes[$att]) || strtoupper($node->Attributes[$att]) != strtoupper($value)) {
return false;;
}
}
}
if (isset($selector_data['atts']['space'])) {
foreach ($selector_data['atts']['space'] as $att => $value) {
if (!isset($node->Attributes[$att]) || !preg_match('/(\A| )+'.preg_quote($value).'( |\Z)+/i', $node->Attributes[$att])) {
return false;;
}
}
}
if (isset($selector_data['atts']['hypen'])) {
foreach ($selector_data['atts']['hypen'] as $att => $value) {
if (!isset($node->Attributes[$att]) || !preg_match('/^'.preg_quote($value).'(-|\Z)+/i', $node->Attributes[$att])) {
return false;;
}
}
}
}
//check pseudo
if (isset($selector_data['pseudo_elements'])) {
// we are not a browser - so don't know how to handle this....
return false;
}
if (isset($selector_data['pseudo_classes'])) {
// we are not a browser - so don't know how to handle this....
return false;
}
//check comibantors
if (isset($selector_data['child_of'])) {
if (!$this->SelectorMatches($selector_data['child_of'], $node->Parent)) {
return false;
}
}
if (isset($selector_data['sibling_of'])) {
if (!$this->SelectorMatches($selector_data['sibling_of'], $node->PrevSibling())) {
return false;
}
}
if (isset($selector_data['descendant_of'])) {
$ancestor = $node;
do {
$ancestor = $ancestor->Parent;
$matches = $this->SelectorMatches($selector_data['descendant_of'], $ancestor);
} while (!$matches && $ancestor->Parent);
if (!$matches) return false;
}
// if we came through here, the selector matches the node
return true;
}
public function GetAllProperties($node)
{
$selectors = $this->GetMatchingSelectors($node);
$properties = array();
foreach ($selectors as $the_selector) {
$properties = array_merge($properties, $the_selector['properties']);
/*foreach ($the_selector['properties'] as $property => $value) {
$properties[$property] = $value; //process !important here ?? !!!
}*/
}
return $properties;
}
}

Event Timeline