Blame | Letzte Änderung | Log anzeigen | RSS feed
<?php/*** Zend Framework** LICENSE** This source file is subject to version 1.0 of the Zend Framework* license, that is bundled with this package in the file LICENSE, and* is available through the world-wide-web at the following URL:* http://www.zend.com/license/framework/1_0.txt. If you did not receive* a copy of the Zend Framework license and are unable to obtain it* through the world-wide-web, please send a note to license@zend.com* so we can mail you a copy immediately.** @package Zend_Search_Lucene* @subpackage Storage* @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)* @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0*//*** @package Zend_Search_Lucene* @subpackage Storage* @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)* @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0*/abstract class Zend_Search_Lucene_Storage_File{/*** Class constructor. Open the file.*/abstract public function __construct($filename, $mode='r');/*** Reads $length number of bytes at the current position in the* file and advances the file pointer.** @param integer $length* @return string*/abstract protected function _fread($length=1);/*** Sets the file position indicator and advances the file pointer.* The new position, measured in bytes from the beginning of the file,* is obtained by adding offset to the position specified by whence,* whose values are defined as follows:* SEEK_SET - Set position equal to offset bytes.* SEEK_CUR - Set position to current location plus offset.* SEEK_END - Set position to end-of-file plus offset. (To move to* a position before the end-of-file, you need to pass a negative value* in offset.)* Upon success, returns 0; otherwise, returns -1** @param integer $offset* @param integer $whence* @return integer*/abstract public function seek($offset, $whence=SEEK_SET);/*** Get file position.** @return integer*/abstract public function tell();/*** Writes $length number of bytes (all, if $length===null) to the end* of the file.** @param string $data* @param integer $length*/abstract protected function _fwrite($data, $length=null);/*** Reads a byte from the current position in the file* and advances the file pointer.** @return integer*/public function readByte(){return ord($this->_fread(1));}/*** Writes a byte to the end of the file.** @param integer $byte*/public function writeByte($byte){return $this->_fwrite(chr($byte), 1);}/*** Read num bytes from the current position in the file* and advances the file pointer.** @param integer $num* @return string*/public function readBytes($num){return $this->_fread($num);}/*** Writes num bytes of data (all, if $num===null) to the end* of the file.** @param string $data* @param integer $num*/public function writeBytes($data, $num=null){$this->_fwrite($data, $num);}/*** Reads an integer from the current position in the file* and advances the file pointer.** @return integer*/public function readInt(){$str = $this->_fread(4);return ord($str{0}) << 24 |ord($str{1}) << 16 |ord($str{2}) << 8 |ord($str{3});}/*** Writes an integer to the end of file.** @param integer $value*/public function writeInt($value){settype($value, 'integer');$this->_fwrite( chr($value>>24 & 0xFF) .chr($value>>16 & 0xFF) .chr($value>>8 & 0xFF) .chr($value & 0xFF), 4 );}/*** Returns a long integer from the current position in the file* and advances the file pointer.** @return integer*/public function readLong(){$str = $this->_fread(8);/*** PHP uses long as largest integer. fseek() uses long for offset.* long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent* conversion to float.* So, largest index segment file is 2Gb*/return /* ord($str{0}) << 56 | *//* ord($str{1}) << 48 | *//* ord($str{2}) << 40 | *//* ord($str{3}) << 32 | */ord($str{4}) << 24 |ord($str{5}) << 16 |ord($str{6}) << 8 |ord($str{7});}/*** Writes long integer to the end of file** @param integer $value*/public function writeLong($value){/*** PHP uses long as largest integer. fseek() uses long for offset.* long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent* conversion to float.* So, largest index segment file is 2Gb*/settype($value, 'integer');$this->_fwrite( "\x00\x00\x00\x00" .chr($value>>24 & 0xFF) .chr($value>>16 & 0xFF) .chr($value>>8 & 0xFF) .chr($value & 0xFF), 8 );}/*** Returns a variable-length integer from the current* position in the file and advances the file pointer.** @return integer*/public function readVInt(){$nextByte = ord($this->_fread(1));$val = $nextByte & 0x7F;for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {$nextByte = ord($this->_fread(1));$val |= ($nextByte & 0x7F) << $shift;}return $val;}/*** Writes a variable-length integer to the end of file.** @param integer $value*/public function writeVInt($value){settype($value, 'integer');while ($value > 0x7F) {$this->_fwrite(chr( ($value & 0x7F)|0x80 ));$value >>= 7;}$this->_fwrite(chr($value));}/*** Reads a string from the current position in the file* and advances the file pointer.** @return string*/public function readString(){$strlen = $this->readVInt();if ($strlen == 0) {return '';} else {/*** This implementation supports only Basic Multilingual Plane* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support* "supplementary characters" (characters whose code points are* greater than 0xFFFF)* Java 2 represents these characters as a pair of char (16-bit)* values, the first from the high-surrogates range (0xD800-0xDBFF),* the second from the low-surrogates range (0xDC00-0xDFFF). Then* they are encoded as usual UTF-8 characters in six bytes.* Standard UTF-8 representation uses four bytes for supplementary* characters.*/$str_val = $this->_fread($strlen);for ($count = 0; $count < $strlen; $count++ ) {if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {$addBytes = 1;if (ord($str_val{$count}) & 0x20 ) {$addBytes++;// Never used. Java2 doesn't encode strings in four bytesif (ord($str_val{$count}) & 0x10 ) {$addBytes++;}}$str_val .= $this->_fread($addBytes);$strlen += $addBytes;// Check for null character. Java2 encodes null character// in two bytes.if (ord($str_val{$count}) == 0xC0 &&ord($str_val{$count+1}) == 0x80 ) {$str_val{$count} = 0;$str_val = substr($str_val,0,$count+1). substr($str_val,$count+2);}$count += $addBytes;}}return $str_val;}}/*** Writes a string to the end of file.** @param string $str* @throws Zend_Search_Lucene_Exception*/public function writeString($str){/*** This implementation supports only Basic Multilingual Plane* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support* "supplementary characters" (characters whose code points are* greater than 0xFFFF)* Java 2 represents these characters as a pair of char (16-bit)* values, the first from the high-surrogates range (0xD800-0xDBFF),* the second from the low-surrogates range (0xDC00-0xDFFF). Then* they are encoded as usual UTF-8 characters in six bytes.* Standard UTF-8 representation uses four bytes for supplementary* characters.*/// convert input to a string before iterating string characterssettype($str, 'string');$chars = $strlen = strlen($str);$containNullChars = false;for ($count = 0; $count < $strlen; $count++ ) {/*** String is already in Java 2 representation.* We should only calculate actual string length and replace* \x00 by \xC0\x80*/if ((ord($str{$count}) & 0xC0) == 0xC0) {$addBytes = 1;if (ord($str{$count}) & 0x20 ) {$addBytes++;// Never used. Java2 doesn't encode strings in four bytes// and we dont't support non-BMP charactersif (ord($str{$count}) & 0x10 ) {$addBytes++;}}$chars -= $addBytes;if (ord($str{$count}) == 0 ) {$containNullChars = true;}$count += $addBytes;}}if ($chars < 0) {throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');}$this->writeVInt($chars);if ($containNullChars) {$this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));} else {$this->_fwrite($str);}}/*** Reads binary data from the current position in the file* and advances the file pointer.** @return string*/public function readBinary(){return $this->_fread($this->readVInt());}}