Blame | Letzte Änderung | Log anzeigen | RSS feed
<?php/*** Zend Framework** LICENSE** This source file is subject to version 1.0 of the Zend Framework* license, that is bundled with this package in the file LICENSE, and* is available through the world-wide-web at the following URL:* http://www.zend.com/license/framework/1_0.txt. If you did not receive* a copy of the Zend Framework license and are unable to obtain it* through the world-wide-web, please send a note to license@zend.com* so we can mail you a copy immediately.** @package Zend_Search_Lucene* @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)* @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0*//** Zend_Search_Lucene_Exception */require_once 'Zend/Search/Lucene/Exception.php';/** Zend_Search_Lucene_Document */require_once 'Zend/Search/Lucene/Document.php';/** Zend_Search_Lucene_Storage_Directory */require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';/** Zend_Search_Lucene_Index_Term */require_once 'Zend/Search/Lucene/Index/Term.php';/** Zend_Search_Lucene_Index_TermInfo */require_once 'Zend/Search/Lucene/Index/TermInfo.php';/** Zend_Search_Lucene_Index_SegmentInfo */require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';/** Zend_Search_Lucene_Index_FieldInfo */require_once 'Zend/Search/Lucene/Index/FieldInfo.php';/** Zend_Search_Lucene_Index_Writer */require_once 'Zend/Search/Lucene/Index/Writer.php';/** Zend_Search_Lucene_Search_QueryParser */require_once 'Zend/Search/Lucene/Search/QueryParser.php';/** Zend_Search_Lucene_Search_QueryHit */require_once 'Zend/Search/Lucene/Search/QueryHit.php';/** Zend_Search_Lucene_Search_Similarity */require_once 'Zend/Search/Lucene/Search/Similarity.php';/*** @package Zend_Search_Lucene* @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)* @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0*/class Zend_Search_Lucene{/*** File system adapter.** @var Zend_Search_Lucene_Storage_Directory*/private $_directory = null;/*** File system adapter closing option** @var boolean*/private $_closeDirOnExit = true;/*** Writer for this index, not instantiated unless required.** @var Zend_Search_Lucene_Index_Writer*/private $_writer = null;/*** Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.** @var array Zend_Search_Lucene_Index_SegmentInfo*/private $_segmentInfos = array();/*** Number of documents in this index.** @var integer*/private $_docCount = 0;/*** Opens the index.** IndexReader constructor needs Directory as a parameter. It should be* a string with a path to the index folder or a Directory object.** @param mixed $directory* @throws Zend_Search_Lucene_Exception*/public function __construct($directory = null, $create = false){if ($directory === null) {throw new Zend_Search_Exception('No index directory specified');}if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {$this->_directory = $directory;$this->_closeDirOnExit = false;} else {$this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);$this->_closeDirOnExit = true;}if ($create) {$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true);} else {$this->_writer = null;}$this->_segmentInfos = array();$segmentsFile = $this->_directory->getFileObject('segments');$format = $segmentsFile->readInt();if ($format != (int)0xFFFFFFFF) {throw new Zend_Search_Lucene_Exception('Wrong segments file format');}// read version$segmentsFile->readLong();// read counter$segmentsFile->readInt();$segments = $segmentsFile->readInt();$this->_docCount = 0;// read segmentInfosfor ($count = 0; $count < $segments; $count++) {$segName = $segmentsFile->readString();$segSize = $segmentsFile->readInt();$this->_docCount += $segSize;$this->_segmentInfos[$count] =new Zend_Search_Lucene_Index_SegmentInfo($segName,$segSize,$this->_directory);}}/*** Object destructor*/public function __destruct(){$this->commit();if ($this->_closeDirOnExit) {$this->_directory->close();}}/*** Returns an instance of Zend_Search_Lucene_Index_Writer for the index** @return Zend_Search_Lucene_Index_Writer*/public function getIndexWriter(){if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);}return $this->_writer;}/*** Returns the Zend_Search_Lucene_Storage_Directory instance for this index.** @return Zend_Search_Lucene_Storage_Directory*/public function getDirectory(){return $this->_directory;}/*** Returns the total number of documents in this index.** @return integer*/public function count(){return $this->_docCount;}/*** Performs a query against the index and returns an array* of Zend_Search_Lucene_Search_QueryHit objects.* Input is a string or Zend_Search_Lucene_Search_Query.** @param mixed $query* @return array ZSearchHit*/public function find($query){if (is_string($query)) {$query = Zend_Search_Lucene_Search_QueryParser::parse($query);}if (!$query instanceof Zend_Search_Lucene_Search_Query) {throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');}$this->commit();$hits = array();$scores = array();$docNum = $this->count();for( $count=0; $count < $docNum; $count++ ) {$docScore = $query->score( $count, $this);if( $docScore != 0 ) {$hit = new Zend_Search_Lucene_Search_QueryHit($this);$hit->id = $count;$hit->score = $docScore;$hits[] = $hit;$scores[] = $docScore;}}array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits);return $hits;}/*** Returns a list of all unique field names that exist in this index.** @param boolean $indexed* @return array*/public function getFieldNames($indexed = false){$result = array();foreach( $this->_segmentInfos as $segmentInfo ) {$result = array_merge($result, $segmentInfo->getFields($indexed));}return $result;}/*** Returns a Zend_Search_Lucene_Document object for the document* number $id in this index.** @param integer|Zend_Search_Lucene_Search_QueryHit $id* @return Zend_Search_Lucene_Document*/public function getDocument($id){if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {/* @var $id Zend_Search_Lucene_Search_QueryHit */$id = $id->id;}if ($id >= $this->_docCount) {/*** @todo exception here?*/return null;}$segCount = 0;$nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();while( $nextSegmentStartId <= $id ) {$segCount++;$nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();}$segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();$fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx');$fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );$fieldValuesPosition = $fdxFile->readLong();$fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt');$fdtFile->seek( $fieldValuesPosition, SEEK_CUR );$fieldCount = $fdtFile->readVInt();$doc = new Zend_Search_Lucene_Document();for( $count = 0; $count < $fieldCount; $count++ ) {$fieldNum = $fdtFile->readVInt();$bits = $fdtFile->readByte();$fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum);if( !($bits & 2) ) { // Text data$field = new Zend_Search_Lucene_Field($fieldInfo->name,$fdtFile->readString(),true,$fieldInfo->isIndexed,$bits & 1 );} else {$field = new Zend_Search_Lucene_Field($fieldInfo->name,$fdtFile->readBinary(),true,$fieldInfo->isIndexed,$bits & 1 );}$doc->addField($field);}return $doc;}/*** Returns an array of all the documents which contain term.** @param Zend_Search_Lucene_Index_Term $term* @return array*/public function termDocs(Zend_Search_Lucene_Index_Term $term){$result = array();$segmentStartDocId = 0;foreach ($this->_segmentInfos as $segInfo) {$termInfo = $segInfo->getTermInfo($term);if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {$segmentStartDocId += $segInfo->count();continue;}$frqFile = $segInfo->openCompoundFile('.frq');$frqFile->seek($termInfo->freqPointer,SEEK_CUR);$docId = 0;for( $count=0; $count < $termInfo->docFreq; $count++ ) {$docDelta = $frqFile->readVInt();if( $docDelta % 2 == 1 ) {$docId += ($docDelta-1)/2;} else {$docId += $docDelta/2;// read freq$frqFile->readVInt();}$result[] = $segmentStartDocId + $docId;}$segmentStartDocId += $segInfo->count();}return $result;}/*** Returns an array of all term positions in the documents.* Return array structure: array( docId => array( pos1, pos2, ...), ...)** @param Zend_Search_Lucene_Index_Term $term* @return array*/public function termPositions(Zend_Search_Lucene_Index_Term $term){$result = array();$segmentStartDocId = 0;foreach( $this->_segmentInfos as $segInfo ) {$termInfo = $segInfo->getTermInfo($term);if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {$segmentStartDocId += $segInfo->count();continue;}$frqFile = $segInfo->openCompoundFile('.frq');$frqFile->seek($termInfo->freqPointer,SEEK_CUR);$freqs = array();$docId = 0;for( $count = 0; $count < $termInfo->docFreq; $count++ ) {$docDelta = $frqFile->readVInt();if( $docDelta % 2 == 1 ) {$docId += ($docDelta-1)/2;$freqs[ $docId ] = 1;} else {$docId += $docDelta/2;$freqs[ $docId ] = $frqFile->readVInt();}}$prxFile = $segInfo->openCompoundFile('.prx');$prxFile->seek($termInfo->proxPointer,SEEK_CUR);foreach ($freqs as $docId => $freq) {$termPosition = 0;$positions = array();for ($count = 0; $count < $freq; $count++ ) {$termPosition += $prxFile->readVInt();$positions[] = $termPosition;}$result[ $segmentStartDocId + $docId ] = $positions;}$segmentStartDocId += $segInfo->count();}return $result;}/*** Returns the number of documents in this index containing the $term.** @param Zend_Search_Lucene_Index_Term $term* @return integer*/public function docFreq(Zend_Search_Lucene_Index_Term $term){$result = 0;foreach ($this->_segmentInfos as $segInfo) {$termInfo = $segInfo->getTermInfo($term);if ($termInfo !== null) {$result += $termInfo->docFreq;}}return $result;}/*** Retrive similarity used by index reader** @return Zend_Search_Lucene_Search_Similarity*/public function getSimilarity(){return Zend_Search_Lucene_Search_Similarity::getDefault();}/*** Returns a normalization factor for "field, document" pair.** @param integer $id* @param string $fieldName* @return Zend_Search_Lucene_Document*/public function norm( $id, $fieldName ){if( $id >= $this->_docCount )return null;$segCount = 0;$nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();while( $nextSegmentStartId <= $id ) {$segCount++;$nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();}$segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();return $this->_segmentInfos[ $segCount ]->norm($id - $segmentStartId, $fieldName);}/*** Adds a document to this index.** @param Zend_Search_Lucene_Document $document*/public function addDocument(Zend_Search_Lucene_Document $document){if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);}$this->_writer->addDocument($document);}/*** Commit changes resulting from delete() or undeleteAll() operations.** @todo delete() and undeleteAll processing.*/public function commit(){if ($this->_writer !== null) {foreach ($this->_writer->commit() as $segmentName => $segmentInfo) {if ($segmentInfo !== null) {$this->_segmentInfos[] = $segmentInfo;$this->_docCount += $segmentInfo->count();} else {foreach ($this->_segmentInfos as $segId => $segInfo) {if ($segInfo->getName() == $segmentName) {unset($this->_segmentInfos[$segId]);}}}}}}/*************************************************************************@todo UNIMPLEMENTED*************************************************************************//*** Returns an array of all terms in this index.** @todo Implementation* @return array*/public function terms(){return array();}/*** Returns true if any documents have been deleted from this index.** @todo Implementation* @return boolean*/public function hasDeletions(){return false;}/*** Deletes a document from the index. $doc may contain a Zend_Search_Lucene_Document* or the number of the document to delete.** @todo Implementation* @param mixed $item_to_del*/public function delete($doc){}/*** Undeletes all documents currently marked as deleted in this index.** @todo Implementation*/public function undeleteAll(){}}