/** * Close terms stream * * Should be used for resources clean up if stream is not read up to the end */ public function closeTermsStream() { while (($termStream = $this->_termsStreamQueue->pop()) !== null) { $termStream->closeTermsStream(); } $this->_termsStreamQueue = null; $this->_lastTerm = null; }
public function testQueue() { $directory = new Directory\Filesystem(__DIR__ . '/_source/_files'); $segmentsList = array('_0', '_1', '_2', '_3', '_4'); $segmentInfoQueue = new Index\TermsPriorityQueue(); foreach ($segmentsList as $segmentName) { $segmentInfo = new Index\SegmentInfo($directory, $segmentName, 2); $segmentInfo->resetTermsStream(); // Skip "empty" segments if ($segmentInfo->currentTerm() !== null) { $segmentInfoQueue->put($segmentInfo); } } $result = array(); while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { if ($segmentInfoQueue->top() === null || $segmentInfoQueue->top()->currentTerm()->key() != $segmentInfo->currentTerm()->key()) { // We got new term $result[] = $segmentInfo->currentTerm(); } $segmentInfo->nextTerm(); // check, if segment dictionary is finished if ($segmentInfo->currentTerm() !== null) { // Put segment back into the priority queue $segmentInfoQueue->put($segmentInfo); } } $this->assertTrue($result == array(new Index\Term('a', 'contents'), new Index\Term('about', 'contents'), new Index\Term('above', 'contents'), new Index\Term('absolutely', 'contents'), new Index\Term('accept', 'contents'), new Index\Term('accesskey', 'contents'), new Index\Term('account', 'contents'), new Index\Term('accounts', 'contents'), new Index\Term('adding', 'contents'), new Index\Term('additional', 'contents'), new Index\Term('adresses', 'contents'), new Index\Term('advised', 'contents'), new Index\Term('after', 'contents'), new Index\Term('alan', 'contents'), new Index\Term('alberton', 'contents'), new Index\Term('alexander', 'contents'), new Index\Term('align', 'contents'), new Index\Term('alink', 'contents'), new Index\Term('all', 'contents'), new Index\Term('alphabetic', 'contents'), new Index\Term('already', 'contents'), new Index\Term('also', 'contents'), new Index\Term('always', 'contents'), new Index\Term('amazon', 'contents'), new Index\Term('an', 'contents'), new Index\Term('and', 'contents'), new Index\Term('annoying', 'contents'), new Index\Term('another', 'contents'), new Index\Term('any', 'contents'), new Index\Term('apart', 'contents'), new Index\Term('api', 'contents'), new Index\Term('appreciate', 'contents'), new Index\Term('are', 'contents'), new Index\Term('arising', 'contents'), new Index\Term('as', 'contents'), new Index\Term('asked', 'contents'), new Index\Term('at', 'contents'), new Index\Term('attach', 'contents'), new Index\Term('attachments', 'contents'), new Index\Term('authors', 'contents'), new Index\Term('available', 'contents'), new Index\Term('b', 'contents'), new Index\Term('back', 'contents'), new Index\Term('backwards', 'contents'), new Index\Term('bar', 'contents'), new Index\Term('base', 'contents'), new Index\Term('be', 'contents'), new Index\Term('beaver', 'contents'), new Index\Term('because', 'contents'), new Index\Term('been', 'contents'), new Index\Term('before', 'contents'), new Index\Term('bgcolor', 'contents'), new Index\Term('binary', 'contents'), new Index\Term('blockquote', 'contents'), new Index\Term('body', 'contents'), new Index\Term('book', 'contents'), new Index\Term('border', 'contents'), new Index\Term('bottom', 'contents'), new Index\Term('break', 'contents'), new Index\Term('brought', 'contents'), new Index\Term('browser', 'contents'), new Index\Term('bug', 'contents'), new Index\Term('bugs', 'contents'), new Index\Term('business', 'contents'), new Index\Term('but', 'contents'), new Index\Term('buy', 'contents'), new Index\Term('buying', 'contents'), new Index\Term('by', 'contents'), new Index\Term('c', 'contents'), new Index\Term('can', 'contents'), new Index\Term('care', 'contents'), new Index\Term('carefully', 'contents'), new Index\Term('case', 'contents'), new Index\Term('caused', 'contents'), new Index\Term('cc', 'contents'), new Index\Term('cd', 'contents'), new Index\Term('cellpadding', 'contents'), new Index\Term('cellspacing', 'contents'), new Index\Term('center', 'contents'), new Index\Term('certain', 'contents'), new Index\Term('chances', 'contents'), new Index\Term('change', 'contents'), new Index\Term('changed', 'contents'), new Index\Term('changes', 'contents'), new Index\Term('changing', 'contents'), new Index\Term('chapter', 'contents'), new Index\Term('charset', 'contents'), new Index\Term('check', 'contents'), new Index\Term('class', 'contents'), new Index\Term('classes', 'contents'), new Index\Term('clay', 'contents'), new Index\Term('clear', 'contents'), new Index\Term('click', 'contents'), new Index\Term('co', 'contents'), new Index\Term('code', 'contents'), new Index\Term('coding', 'contents'), new Index\Term('colspan', 'contents'), new Index\Term('com', 'contents'), new Index\Term('commands', 'contents'), new Index\Term('comment', 'contents'), new Index\Term('community', 'contents'), new Index\Term('company', 'contents'), new Index\Term('compatibility', 'contents'), new Index\Term('compatible', 'contents'), new Index\Term('components', 'contents'), new Index\Term('conditions', 'contents'), new Index\Term('consequential', 'contents'), new Index\Term('considered', 'contents'), new Index\Term('consists', 'contents'), new Index\Term('contact', 'contents'), new Index\Term('contains', 'contents'), new Index\Term('content', 'contents'), new Index\Term('contents', 'contents'), new Index\Term('contract', 'contents'), new Index\Term('contribute', 'contents'), new Index\Term('contributed', 'contents'), new Index\Term('contributing', 'contents'), new Index\Term('contributors', 'contents'), new Index\Term('convissor', 'contents'), new Index\Term('copyright', 'contents'), new Index\Term('core', 'contents'), new Index\Term('costa', 'contents'), new Index\Term('could', 'contents'), new Index\Term('cox', 'contents'), new Index\Term('create', 'contents'), new Index\Term('creating', 'contents'), new Index\Term('cvs', 'contents'), new Index\Term('cvsread', 'contents'), new Index\Term('d', 'contents'), new Index\Term('damage', 'contents'), new Index\Term('damages', 'contents'), new Index\Term('daniel', 'contents'), new Index\Term('data', 'contents'), new Index\Term('date', 'contents'), new Index\Term('david', 'contents'), new Index\Term('depends', 'contents'), new Index\Term('derivative', 'contents'), new Index\Term('derived', 'contents'), new Index\Term('describes', 'contents'), new Index\Term('details', 'contents'), new Index\Term('detection', 'contents'), new Index\Term('dev', 'contents'), new Index\Term('developer', 'contents'), new Index\Term('developers', 'contents'), new Index\Term('diff', 'contents'), new Index\Term('different', 'contents'), new Index\Term('direct', 'contents'), new Index\Term('disclaimed', 'contents'), new Index\Term('disclaimer', 'contents'), new Index\Term('distributed', 'contents'), new Index\Term('distribution', 'contents'), new Index\Term('div', 'contents'), new Index\Term('dl', 'contents'), new Index\Term('do', 'contents'), new Index\Term('doc', 'contents'), new Index\Term('docblock', 'contents'), new Index\Term('docbook', 'contents'), new Index\Term('docs', 'contents'), new Index\Term('document', 'contents'), new Index\Term('documentation', 'contents'), new Index\Term('does', 'contents'), new Index\Term('doesn', 'contents'), new Index\Term('don', 'contents'), new Index\Term('double', 'contents'), new Index\Term('dt', 'contents'), new Index\Term('dumbill', 'contents'), new Index\Term('each', 'contents'), new Index\Term('easiest', 'contents'), new Index\Term('easy', 'contents'), new Index\Term('edd', 'contents'), new Index\Term('edit', 'contents'), new Index\Term('eeeeee', 'contents'), new Index\Term('email', 'contents'), new Index\Term('endorse', 'contents'), new Index\Term('english', 'contents'), new Index\Term('equal', 'contents'), new Index\Term('equiv', 'contents'), new Index\Term('essential', 'contents'), new Index\Term('even', 'contents'), new Index\Term('event', 'contents'), new Index\Term('everyone', 'contents'), new Index\Term('example', 'contents'), new Index\Term('exemplary', 'contents'), new Index\Term('existing', 'contents'), new Index\Term('expand', 'contents'), new Index\Term('explanation', 'contents'), new Index\Term('explanations', 'contents'), new Index\Term('explicit', 'contents'), new Index\Term('express', 'contents'), new Index\Term('extension', 'contents'), new Index\Term('fairly', 'contents'), new Index\Term('faq', 'contents'), new Index\Term('feature', 'contents'), new Index\Term('feedback', 'contents'), new Index\Term('feel', 'contents'), new Index\Term('ff', 'contents'), new Index\Term('ffffff', 'contents'), new Index\Term('file', 'contents'), new Index\Term('fill', 'contents'), new Index\Term('financial', 'contents'), new Index\Term('find', 'contents'), new Index\Term('fine', 'contents'), new Index\Term('finished', 'contents'), new Index\Term('first', 'contents'), new Index\Term('fitness', 'contents'), new Index\Term('fix', 'contents'), new Index\Term('fixed', 'contents'), new Index\Term('following', 'contents'), new Index\Term('foo', 'contents'), new Index\Term('footer', 'contents'), new Index\Term('for', 'contents'), new Index\Term('force', 'contents'), new Index\Term('forget', 'contents'), new Index\Term('form', 'contents'), new Index\Term('forms', 'contents'), new Index\Term('forth', 'contents'), new Index\Term('found', 'contents'), new Index\Term('free', 'contents'), new Index\Term('frequently', 'contents'), new Index\Term('from', 'contents'), new Index\Term('front', 'contents'), new Index\Term('fully', 'contents'), new Index\Term('functionality', 'contents'), new Index\Term('generally', 'contents'), new Index\Term('generator', 'contents'), new Index\Term('giving', 'contents'), new Index\Term('go', 'contents'), new Index\Term('good', 'contents'), new Index\Term('goods', 'contents'), new Index\Term('gregory', 'contents'), new Index\Term('groundbraking', 'contents'), new Index\Term('group', 'contents'), new Index\Term('guide', 'contents'), new Index\Term('h', 'contents'), new Index\Term('happy', 'contents'), new Index\Term('has', 'contents'), new Index\Term('hasn', 'contents'), new Index\Term('have', 'contents'), new Index\Term('head', 'contents'), new Index\Term('header', 'contents'), new Index\Term('help', 'contents'), new Index\Term('helping', 'contents'), new Index\Term('her', 'contents'), new Index\Term('hesitate', 'contents'), new Index\Term('his', 'contents'), new Index\Term('holder', 'contents'), new Index\Term('holders', 'contents'), new Index\Term('home', 'contents'), new Index\Term('homepage', 'contents'), new Index\Term('how', 'contents'), new Index\Term('however', 'contents'), new Index\Term('hr', 'contents'), new Index\Term('href', 'contents'), new Index\Term('html', 'contents'), new Index\Term('http', 'contents'), new Index\Term('i', 'contents'), new Index\Term('if', 'contents'), new Index\Term('immoral', 'contents'), new Index\Term('implied', 'contents'), new Index\Term('important', 'contents'), new Index\Term('improvement', 'contents'), new Index\Term('improving', 'contents'), new Index\Term('in', 'contents'), new Index\Term('incidental', 'contents'), new Index\Term('include', 'contents'), new Index\Term('including', 'contents'), new Index\Term('independent', 'contents'), new Index\Term('index', 'contents'), new Index\Term('indirect', 'contents'), new Index\Term('information', 'contents'), new Index\Term('install', 'contents'), new Index\Term('installation', 'contents'), new Index\Term('installer', 'contents'), new Index\Term('interruption', 'contents'), new Index\Term('into', 'contents'), new Index\Term('introduce', 'contents'), new Index\Term('introduction', 'contents'), new Index\Term('is', 'contents'), new Index\Term('iso', 'contents'), new Index\Term('it', 'contents'), new Index\Term('its', 'contents'), new Index\Term('jansen', 'contents'), new Index\Term('jon', 'contents'), new Index\Term('keep', 'contents'), new Index\Term('knowles', 'contents'), new Index\Term('lack', 'contents'), new Index\Term('languages', 'contents'), new Index\Term('later', 'contents'), new Index\Term('latest', 'contents'), new Index\Term('left', 'contents'), new Index\Term('legalnotice', 'contents'), new Index\Term('li', 'contents'), new Index\Term('liability', 'contents'), new Index\Term('liable', 'contents'), new Index\Term('license', 'contents'), new Index\Term('licenses', 'contents'), new Index\Term('like', 'contents'), new Index\Term('limited', 'contents'), new Index\Term('link', 'contents'), new Index\Term('list', 'contents'), new Index\Term('listed', 'contents'), new Index\Term('lists', 'contents'), new Index\Term('literal', 'contents'), new Index\Term('ll', 'contents'), new Index\Term('login', 'contents'), new Index\Term('look', 'contents'), new Index\Term('lorenzo', 'contents'), new Index\Term('loss', 'contents'), new Index\Term('loveless', 'contents'), new Index\Term('mail', 'contents'), new Index\Term('mailinglist', 'contents'), new Index\Term('mailto', 'contents'), new Index\Term('maintain', 'contents'), new Index\Term('maintainer', 'contents'), new Index\Term('maintainers', 'contents'), new Index\Term('make', 'contents'), new Index\Term('makes', 'contents'), new Index\Term('manual', 'contents'), new Index\Term('martin', 'contents'), new Index\Term('material', 'contents'), new Index\Term('materials', 'contents'), new Index\Term('may', 'contents'), new Index\Term('mean', 'contents'), new Index\Term('means', 'contents'), new Index\Term('meet', 'contents'), new Index\Term('merchantability', 'contents'), new Index\Term('merz', 'contents'), new Index\Term('met', 'contents'), new Index\Term('meta', 'contents'), new Index\Term('michael', 'contents'), new Index\Term('migrating', 'contents'), new Index\Term('mika', 'contents'), new Index\Term('mime', 'contents'), new Index\Term('modification', 'contents'), new Index\Term('modified', 'contents'), new Index\Term('modular', 'contents'), new Index\Term('more', 'contents'), new Index\Term('must', 'contents'), new Index\Term('n', 'contents'), new Index\Term('name', 'contents'), new Index\Term('names', 'contents'), new Index\Term('navfooter', 'contents'), new Index\Term('navheader', 'contents'), new Index\Term('navigation', 'contents'), new Index\Term('nbsp', 'contents'), new Index\Term('need', 'contents'), new Index\Term('negligence', 'contents'), new Index\Term('neither', 'contents'), new Index\Term('net', 'contents'), new Index\Term('neufeind', 'contents'), new Index\Term('new', 'contents'), new Index\Term('newpackage', 'contents'), new Index\Term('next', 'contents'), new Index\Term('no', 'contents'), new Index\Term('nor', 'contents'), new Index\Term('not', 'contents'), new Index\Term('note', 'contents'), new Index\Term('notice', 'contents'), new Index\Term('now', 'contents'), new Index\Term('obtain', 'contents'), new Index\Term('obtained', 'contents'), new Index\Term('of', 'contents'), new Index\Term('on', 'contents'), new Index\Term('one', 'contents'), new Index\Term('only', 'contents'), new Index\Term('open', 'contents'), new Index\Term('opencontent', 'contents'), new Index\Term('openpub', 'contents'), new Index\Term('or', 'contents'), new Index\Term('order', 'contents'), new Index\Term('org', 'contents'), new Index\Term('originally', 'contents'), new Index\Term('other', 'contents'), new Index\Term('otherwise', 'contents'), new Index\Term('our', 'contents'), new Index\Term('out', 'contents'), new Index\Term('outlook', 'contents'), new Index\Term('p', 'contents'), new Index\Term('package', 'contents'), new Index\Term('packages', 'contents'), new Index\Term('page', 'contents'), new Index\Term('paper', 'contents'), new Index\Term('parise', 'contents'), new Index\Term('part', 'contents'), new Index\Term('particular', 'contents'), new Index\Term('password', 'contents'), new Index\Term('patch', 'contents'), new Index\Term('patches', 'contents'), new Index\Term('pear', 'contents'), new Index\Term('people', 'contents'), new Index\Term('perform', 'contents'), new Index\Term('permission', 'contents'), new Index\Term('permitted', 'contents'), new Index\Term('persists', 'contents'), new Index\Term('php', 'contents'), new Index\Term('phpfi', 'contents'), new Index\Term('plain', 'contents'), new Index\Term('please', 'contents'), new Index\Term('possibility', 'contents'), new Index\Term('post', 'contents'), new Index\Term('postinstall', 'contents'), new Index\Term('ppm', 'contents'), new Index\Term('pre', 'contents'), new Index\Term('preface', 'contents'), new Index\Term('prefixed', 'contents'), new Index\Term('presently', 'contents'), new Index\Term('prev', 'contents'), new Index\Term('previous', 'contents'), new Index\Term('prior', 'contents'), new Index\Term('process', 'contents'), new Index\Term('procurement', 'contents'), new Index\Term('products', 'contents'), new Index\Term('profits', 'contents'), new Index\Term('prohibited', 'contents'), new Index\Term('project', 'contents'), new Index\Term('promote', 'contents'), new Index\Term('proper', 'contents'), new Index\Term('provided', 'contents'), new Index\Term('provides', 'contents'), new Index\Term('pserver', 'contents'), new Index\Term('public', 'contents'), new Index\Term('publication', 'contents'), new Index\Term('published', 'contents'), new Index\Term('purpose', 'contents'), new Index\Term('questions', 'contents'), new Index\Term('quote', 'contents'), new Index\Term('read', 'contents'), new Index\Term('redistribution', 'contents'), new Index\Term('redistributions', 'contents'), new Index\Term('regents', 'contents'), new Index\Term('rejected', 'contents'), new Index\Term('rel', 'contents'), new Index\Term('relevant', 'contents'), new Index\Term('report', 'contents'), new Index\Term('reported', 'contents'), new Index\Term('reporting', 'contents'), new Index\Term('repository', 'contents'), new Index\Term('representatives', 'contents'), new Index\Term('reproduce', 'contents'), new Index\Term('requests', 'contents'), new Index\Term('requirements', 'contents'), new Index\Term('reserved', 'contents'), new Index\Term('restrictions', 'contents'), new Index\Term('resulting', 'contents'), new Index\Term('retain', 'contents'), new Index\Term('revision', 'contents'), new Index\Term('right', 'contents'), new Index\Term('rights', 'contents'), new Index\Term('rpc', 'contents'), new Index\Term('running', 'contents'), new Index\Term('s', 'contents'), new Index\Term('sample', 'contents'), new Index\Term('schlitt', 'contents'), new Index\Term('schmidt', 'contents'), new Index\Term('screen', 'contents'), new Index\Term('script', 'contents'), new Index\Term('sect', 'contents'), new Index\Term('section', 'contents'), new Index\Term('see', 'contents'), new Index\Term('send', 'contents'), new Index\Term('seriously', 'contents'), new Index\Term('service', 'contents'), new Index\Term('services', 'contents'), new Index\Term('set', 'contents'), new Index\Term('several', 'contents'), new Index\Term('shall', 'contents'), new Index\Term('she', 'contents'), new Index\Term('should', 'contents'), new Index\Term('similar', 'contents'), new Index\Term('so', 'contents'), new Index\Term('software', 'contents'), new Index\Term('some', 'contents'), new Index\Term('something', 'contents'), new Index\Term('source', 'contents'), new Index\Term('sources', 'contents'), new Index\Term('span', 'contents'), new Index\Term('special', 'contents'), new Index\Term('specific', 'contents'), new Index\Term('speed', 'contents'), new Index\Term('standard', 'contents'), new Index\Term('standards', 'contents'), new Index\Term('stefan', 'contents'), new Index\Term('step', 'contents'), new Index\Term('stephan', 'contents'), new Index\Term('still', 'contents'), new Index\Term('strict', 'contents'), new Index\Term('structure', 'contents'), new Index\Term('stylesheet', 'contents'), new Index\Term('subject', 'contents'), new Index\Term('submit', 'contents'), new Index\Term('submitting', 'contents'), new Index\Term('substantively', 'contents'), new Index\Term('substitute', 'contents'), new Index\Term('such', 'contents'), new Index\Term('summary', 'contents'), new Index\Term('support', 'contents'), new Index\Term('sure', 'contents'), new Index\Term('system', 'contents'), new Index\Term('t', 'contents'), new Index\Term('table', 'contents'), new Index\Term('take', 'contents'), new Index\Term('target', 'contents'), new Index\Term('task', 'contents'), new Index\Term('td', 'contents'), new Index\Term('terms', 'contents'), new Index\Term('test', 'contents'), new Index\Term('tested', 'contents'), new Index\Term('text', 'contents'), new Index\Term('th', 'contents'), new Index\Term('that', 'contents'), new Index\Term('the', 'contents'), new Index\Term('their', 'contents'), new Index\Term('them', 'contents'), new Index\Term('theory', 'contents'), new Index\Term('there', 'contents'), new Index\Term('think', 'contents'), new Index\Term('this', 'contents'), new Index\Term('thomas', 'contents'), new Index\Term('those', 'contents'), new Index\Term('thus', 'contents'), new Index\Term('time', 'contents'), new Index\Term('tips', 'contents'), new Index\Term('title', 'contents'), new Index\Term('titlepage', 'contents'), new Index\Term('to', 'contents'), new Index\Term('tobias', 'contents'), new Index\Term('toc', 'contents'), new Index\Term('top', 'contents'), new Index\Term('tort', 'contents'), new Index\Term('tr', 'contents'), new Index\Term('tracking', 'contents'), new Index\Term('translated', 'contents'), new Index\Term('translating', 'contents'), new Index\Term('translation', 'contents'), new Index\Term('translations', 'contents'), new Index\Term('translators', 'contents'), new Index\Term('try', 'contents'), new Index\Term('tt', 'contents'), new Index\Term('tuupola', 'contents'), new Index\Term('txt', 'contents'), new Index\Term('type', 'contents'), new Index\Term('u', 'contents'), new Index\Term('ul', 'contents'), new Index\Term('unable', 'contents'), new Index\Term('under', 'contents'), new Index\Term('understand', 'contents'), new Index\Term('unified', 'contents'), new Index\Term('unless', 'contents'), new Index\Term('up', 'contents'), new Index\Term('us', 'contents'), new Index\Term('use', 'contents'), new Index\Term('used', 'contents'), new Index\Term('useful', 'contents'), new Index\Term('usefulinc', 'contents'), new Index\Term('userinput', 'contents'), new Index\Term('users', 'contents'), new Index\Term('using', 'contents'), new Index\Term('usually', 'contents'), new Index\Term('v', 'contents'), new Index\Term('valign', 'contents'), new Index\Term('verbose', 'contents'), new Index\Term('version', 'contents'), new Index\Term('versions', 'contents'), new Index\Term('vlink', 'contents'), new Index\Term('wallner', 'contents'), new Index\Term('warranties', 'contents'), new Index\Term('way', 'contents'), new Index\Term('ways', 'contents'), new Index\Term('we', 'contents'), new Index\Term('webservices', 'contents'), new Index\Term('welcome', 'contents'), new Index\Term('what', 'contents'), new Index\Term('when', 'contents'), new Index\Term('whether', 'contents'), new Index\Term('which', 'contents'), new Index\Term('width', 'contents'), new Index\Term('will', 'contents'), new Index\Term('wish', 'contents'), new Index\Term('wishlist', 'contents'), new Index\Term('wishlists', 'contents'), new Index\Term('with', 'contents'), new Index\Term('without', 'contents'), new Index\Term('won', 'contents'), new Index\Term('work', 'contents'), new Index\Term('would', 'contents'), new Index\Term('write', 'contents'), new Index\Term('writing', 'contents'), new Index\Term('written', 'contents'), new Index\Term('www', 'contents'), new Index\Term('xml', 'contents'), new Index\Term('xmlrpc', 'contents'), new Index\Term('you', 'contents'), new Index\Term('your', 'contents'), new Index\Term('1178009946', 'modified'), new Index\Term('about', 'path'), new Index\Term('authors', 'path'), new Index\Term('bugs', 'path'), new Index\Term('contributing', 'path'), new Index\Term('copyright', 'path'), new Index\Term('core', 'path'), new Index\Term('documentation', 'path'), new Index\Term('html', 'path'), new Index\Term('indexsource', 'path'), new Index\Term('newpackage', 'path'), new Index\Term('patches', 'path'), new Index\Term('pear', 'path'), new Index\Term('wishlist', 'path'))); }
/** * Merge fields information */ private function _mergeTerms() { $segmentInfoQueue = new TermsPriorityQueue(); $segmentStartId = 0; foreach ($this->_segmentInfos as $segName => $segmentInfo) { $segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, SegmentInfo::SM_MERGE_INFO); // Skip "empty" segments if ($segmentInfo->currentTerm() !== null) { $segmentInfoQueue->put($segmentInfo); } } $this->_writer->initializeDictionaryFiles(); $termDocs = array(); while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { // Merge positions array $termDocs += $segmentInfo->currentTermPositions(); if ($segmentInfoQueue->top() === null || $segmentInfoQueue->top()->currentTerm()->key() != $segmentInfo->currentTerm()->key()) { // We got new term ksort($termDocs, SORT_NUMERIC); // Add term if it's contained in any document if (count($termDocs) > 0) { $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs); } $termDocs = array(); } $segmentInfo->nextTerm(); // check, if segment dictionary is finished if ($segmentInfo->currentTerm() !== null) { // Put segment back into the priority queue $segmentInfoQueue->put($segmentInfo); } } $this->_writer->closeDictionaryFiles(); }
/** * Returns an array of all terms in this index. * * @return array */ public function terms() { $result = array(); $segmentInfoQueue = new Index\TermsPriorityQueue(); foreach ($this->_segmentInfos as $segmentInfo) { $segmentInfo->resetTermsStream(); // Skip "empty" segments if ($segmentInfo->currentTerm() !== null) { $segmentInfoQueue->put($segmentInfo); } } while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { if ($segmentInfoQueue->top() === null || $segmentInfoQueue->top()->currentTerm()->key() != $segmentInfo->currentTerm()->key()) { // We got new term $result[] = $segmentInfo->currentTerm(); } if ($segmentInfo->nextTerm() !== null) { // Put segment back into the priority queue $segmentInfoQueue->put($segmentInfo); } } return $result; }