Subversion-Projekte lars-tiefland.php_share

Revision

Details | Letzte Änderung | Log anzeigen | RSS feed

Revision Autor Zeilennr. Zeile
1 lars 1
<?php
2
 
3
require_once(HTML2PS_DIR.'fetcher._interface.class.php');
4
 
5
/**
6
 * This class handles fetching HTTP code using CURL extension
7
 */
8
class FetcherUrlCurl extends Fetcher {
9
  /**
10
   * @var String URL being fetched
11
   * @access private
12
   */
13
  var $url;
14
  var $_proxy;
15
 
16
  function FetcherUrlCurl() {
17
    $this->url = "";
18
    $this->set_proxy(null);
19
  }
20
 
21
  function _fix_url($url) {
22
    // If only host name was specified, add trailing slash
23
    // (e.g. replace http://www.google.com with http://www.google.com/
24
    if (preg_match('#^.*://[^/]+$#', $url)) {
25
      $url .= '/';
26
    };
27
 
28
    return $url;
29
  }
30
 
31
  function get_base_url() {
32
    return $this->url;
33
  }
34
 
35
  function get_data($url) {
36
    $this->url = $url;
37
 
38
    // URL to be fetched
39
    $curl = curl_init();
40
 
41
    $fixed_url = $this->_fix_url($url);
42
 
43
    curl_setopt($curl, CURLOPT_URL, $fixed_url);
44
    curl_setopt($curl, CURLOPT_USERAGENT, DEFAULT_USER_AGENT);
45
 
46
    if (!@curl_setopt($curl, CURLOPT_FOLLOWLOCATION, 1)) {
47
      error_log('CURLOPT_FOLLOWLOCATION will not work in safe_mode; pages with redirects may be rendered incorrectly');
48
    };
49
 
50
    curl_setopt($curl, CURLOPT_HEADER, 1);
51
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
52
 
53
    $proxy = $this->get_proxy();
54
    if (!is_null($proxy)) {
55
      curl_setopt($curl, CURLOPT_PROXY, $proxy);
56
    };
57
 
58
    /**
59
     * Fetch headers and page content to the $response variable
60
     * and close CURL session
61
     */
62
    $response = curl_exec($curl);
63
 
64
    if ($response === FALSE) {
65
      error_log(sprintf('Cannot open %s, CURL error is: %s',
66
                        $url,
67
                        curl_error($curl)));
68
      curl_close($curl);
69
      return null;
70
    }
71
 
72
    curl_close($curl);
73
 
74
    /**
75
     * According to HTTP standard, headers block separated from
76
     * body block with empty line - '\r\n\r\n' sequence. As body
77
     * might contain this sequence too, we should use 'non-greedy'
78
     * modifier on the first group in the regular expression.
79
     * Of course, we should process the response as a whole using
80
     * 's' modifier.
81
     */
82
    preg_match('/^(.*?)\r\n\r\n(.*)$/s', $response, $matches);
83
 
84
    /**
85
     * Usually there's more than one line in a header block,
86
     * separated with '\r\n' sequence.
87
     *
88
     * The very first line contains HTTP response code (e.g. HTTP/1.1 200 OK),
89
     * so we may safely ignore it.
90
     */
91
    $headers = array_slice(explode("\r\n", $matches[1]),1);
92
    $content = $matches[2];
93
 
94
    return new FetchedDataURL($content, $headers, $this->url);
95
  }
96
 
97
  function get_proxy() {
98
    return $this->_proxy;
99
  }
100
 
101
  function set_proxy($proxy) {
102
    $this->_proxy = $proxy;
103
  }
104
}
105
?>