Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
class ParserXHTML extends Parser {
3
  function &process($html, &$pipeline, &$media) {
4
    // Run the XML parser on the XHTML we've prepared
5
    $dom_tree = TreeBuilder::build($html);
6
 
7
    // Check if parser returned valid document
8
    if (is_null($dom_tree)) {
9
      readfile(HTML2PS_DIR.'templates/cannot_parse.html');
10
      error_log(sprintf("Cannot parse document: %s", $pipeline->get_base_url()));
11
      die("HTML2PS Error");
12
    }
13
 
14
    /**
15
     * Detect the base URI for this document.
16
     *
17
     * According to the HTML 4.01 p. 12.4.1:
18
     * User agents must calculate the base URI according to the following precedences (highest priority to lowest):
19
     *
20
     * 1. The base URI is set by the BASE element.
21
     * 2. The base URI is given by meta data discovered during a protocol interaction, such as an HTTP header (see [RFC2616]).
22
     * 3. By default, the base URI is that of the current document. Not all HTML documents have a base URI (e.g., a valid HTML document may appear in an email and may not be designated by a URI). Such HTML documents are considered erroneous if they contain relative URIs and rely on a default base URI.
23
     */
24
 
25
    /**
26
     * Check if BASE element present; use its first occurrence
27
     */
28
    $this->_scan_base($dom_tree, $pipeline);
29
 
30
    /**
31
     * @todo fall back to the protocol metadata
32
     */
33
 
34
    /**
35
     * Parse STYLE / LINK nodes containing CSS references and definitions
36
     * This should be done here, as the document body may include STYLE node
37
     * (this violates HTML standard, but is rather often appears in Web)
38
     */
39
    $css =& $pipeline->get_current_css();
40
    $css->scan_styles($dom_tree, $pipeline);
41
 
42
    if (!is_null($media)) {
43
      // Setup media size and margins
44
      $pipeline->get_page_media(1, $media);
45
      $pipeline->output_driver->update_media($media);
46
      $pipeline->_setupScales($media);
47
    };
48
 
49
    $body =& traverse_dom_tree_pdf($dom_tree);
50
    $box =& create_pdf_box($body, $pipeline);
51
 
52
    return $box;
53
  }
54
 
55
  function _scan_base(&$root, &$pipeline) {
56
    switch ($root->node_type()) {
57
    case XML_ELEMENT_NODE:
58
      if ($root->tagname() === 'base') {
59
        /**
60
         * See HTML 4.01 p 12.4
61
         * href - this attribute specifies an absolute URI that acts as the base URI for resolving relative URIs.
62
         *
63
         * At this moment pipeline object have current document URI on the top of the stack;
64
         * we should replace it with the value of 'href' attribute of the BASE tag
65
         *
66
         * To handle (possibly) incorrect values, we use 'guess_url' function; in this case
67
         * if 'href' attribute contains absolute value (is it SHOULD be), it will be used;
68
         * if it is missing or is relative, we'll get more of less usable value base on current
69
         * document URI.
70
         */
71
        $new_url = $pipeline->guess_url($root->get_attribute('href'));
72
        $pipeline->pop_base_url();
73
        $pipeline->push_base_url($new_url);
74
 
75
        return true;
76
      };
77
 
78
      // We continue processing here!
79
    case XML_DOCUMENT_NODE:
80
      $child = $root->first_child();
81
      while ($child) {
82
        if ($this->_scan_base($child, $pipeline)) { return; };
83
        $child = $child->next_sibling();
84
      };
85
 
86
      return false;
87
    };
88
 
89
    return false;
90
  }
91
}
92
?>