Ejemplo n.º 1
0
 public function testDocumentIsValid()
 {
     // Assert a page has not been set yet.
     $this->assertNull($this->_testDocument->getPageId(), 'The document page ID was prematurely set');
     // Assert a page can be set (in this case, the first page).
     $this->_testDocument->setPage(null);
     $this->assertNotNull($this->_testDocument->getPageId(), 'The document page ID was not set');
     // Assert accessor methods return expected values.
     $this->assertIdentical(TEST_DOCUMENT_ID, $this->_testDocument->getId());
     // Assert the decoding the base title works.
     $baseTitle = Scripto_Document::encodeBaseTitle($this->_testDocument->getId(), $this->_testDocument->getPageId());
     $decodedBaseTitle = Scripto_Document::decodeBaseTitle($baseTitle);
     $this->assertEqual($decodedBaseTitle[0], TEST_DOCUMENT_ID, 'Something wen wrong during base title encoding/decoding. Document ID does not match');
     $this->assertEqual($decodedBaseTitle[1], $this->_testDocument->getPageId(), 'Something wen wrong during base title encoding/decoding. Page ID does not match');
 }
Ejemplo n.º 2
0
 /**
  * Encode a base title that enables fail-safe document page transport
  * between the external system, Scripto, and MediaWiki.
  *
  * The base title is the base MediaWiki page title that corresponds to the
  * document page. Encoding is necessary to allow all Unicode characters in
  * document and page IDs, even those not allowed in URL syntax and MediaWiki
  * naming conventions. Encoding in Base64 allows the title to be decoded.
  *
  * The base title has four parts:
  * <ol>
  *     <li>A title prefix to keep MediaWiki from capitalizing the first
  *     character</li>
  *     <li>A URL-safe Base64 encoded document ID</li>
  *     <li>A delimiter between the encoded document ID and page ID</li>
  *     <li>A URL-safe Base64 encoded page ID</li>
  * </ol>
  *
  * @link http://en.wikipedia.org/wiki/Base64#URL_applications
  * @link http://en.wikipedia.org/wiki/Wikipedia:Naming_conventions_%28technical_restrictions%29
  * @param string|int $documentId The document ID
  * @param string|int $pageId The page ID
  * @return string The encoded base title
  */
 public static function encodeBaseTitle($documentId, $pageId)
 {
     return self::BASE_TITLE_PREFIX . Scripto_Document::base64UrlEncode($documentId) . self::BASE_TITLE_DELIMITER . Scripto_Document::base64UrlEncode($pageId);
 }
Ejemplo n.º 3
0
 /**
  * Get all documents from MediaWiki that have at least one page with text.
  * 
  * @uses Scripto_Service_MediaWiki::getAllPages()
  * @return array An array following this format:
  * <code>
  * array(
  *     {document ID} => array(
  *         ['mediawiki_titles'] => array(
  *             {page ID} => {mediawiki title}, 
  *             {...}
  *         ), 
  *         ['document_title'] => {document title}
  *     ), 
  *     {...}
  * )
  * </code>
  */
 public function getAllDocuments()
 {
     $from = null;
     $documentTitles = array();
     $allDocuments = array();
     do {
         $response = $this->_mediawiki->getAllPages(array('aplimit' => 500, 'apminsize' => 1, 'apprefix' => Scripto_Document::BASE_TITLE_PREFIX, 'apfrom' => $from));
         foreach ($response['query']['allpages'] as $value) {
             // Set the document ID and page ID.
             $documentIds = Scripto_Document::decodeBaseTitle($value['title']);
             // Set the page and continue if the document was already set.
             if (array_key_exists($documentIds[0], $documentTitles)) {
                 $allDocuments[$documentIds[0]]['mediawiki_titles'][$documentIds[1]] = $value['title'];
                 continue;
                 // Set the document. Before getting the title, filter out pages
                 // that are not valid documents.
             } else {
                 if (!$this->_adapter->documentExists($documentIds[0])) {
                     continue;
                 }
                 $documentTitle = $this->_adapter->getDocumentTitle($documentIds[0]);
                 $documentTitles[$documentIds[0]] = $documentTitle;
             }
             $allDocuments[$documentIds[0]] = array('mediawiki_titles' => array($documentIds[1] => $value['title']), 'document_title' => $documentTitle);
         }
         // Set the query continue, if any.
         if (isset($response['query-continue'])) {
             $from = $response['query-continue']['allpages']['apfrom'];
         } else {
             $from = null;
         }
     } while ($from);
     return $allDocuments;
 }