public function testDocumentIsValid() { // Assert a page has not been set yet. $this->assertNull($this->_testDocument->getPageId(), 'The document page ID was prematurely set'); // Assert a page can be set (in this case, the first page). $this->_testDocument->setPage(null); $this->assertNotNull($this->_testDocument->getPageId(), 'The document page ID was not set'); // Assert accessor methods return expected values. $this->assertIdentical(TEST_DOCUMENT_ID, $this->_testDocument->getId()); // Assert the decoding the base title works. $baseTitle = Scripto_Document::encodeBaseTitle($this->_testDocument->getId(), $this->_testDocument->getPageId()); $decodedBaseTitle = Scripto_Document::decodeBaseTitle($baseTitle); $this->assertEqual($decodedBaseTitle[0], TEST_DOCUMENT_ID, 'Something wen wrong during base title encoding/decoding. Document ID does not match'); $this->assertEqual($decodedBaseTitle[1], $this->_testDocument->getPageId(), 'Something wen wrong during base title encoding/decoding. Page ID does not match'); }
/** * Get all documents from MediaWiki that have at least one page with text. * * @uses Scripto_Service_MediaWiki::getAllPages() * @return array An array following this format: * <code> * array( * {document ID} => array( * ['mediawiki_titles'] => array( * {page ID} => {mediawiki title}, * {...} * ), * ['document_title'] => {document title} * ), * {...} * ) * </code> */ public function getAllDocuments() { $from = null; $documentTitles = array(); $allDocuments = array(); do { $response = $this->_mediawiki->getAllPages(array('aplimit' => 500, 'apminsize' => 1, 'apprefix' => Scripto_Document::BASE_TITLE_PREFIX, 'apfrom' => $from)); foreach ($response['query']['allpages'] as $value) { // Set the document ID and page ID. $documentIds = Scripto_Document::decodeBaseTitle($value['title']); // Set the page and continue if the document was already set. if (array_key_exists($documentIds[0], $documentTitles)) { $allDocuments[$documentIds[0]]['mediawiki_titles'][$documentIds[1]] = $value['title']; continue; // Set the document. Before getting the title, filter out pages // that are not valid documents. } else { if (!$this->_adapter->documentExists($documentIds[0])) { continue; } $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]); $documentTitles[$documentIds[0]] = $documentTitle; } $allDocuments[$documentIds[0]] = array('mediawiki_titles' => array($documentIds[1] => $value['title']), 'document_title' => $documentTitle); } // Set the query continue, if any. if (isset($response['query-continue'])) { $from = $response['query-continue']['allpages']['apfrom']; } else { $from = null; } } while ($from); return $allDocuments; }