Esempio n. 1
0
 function start($text, $request, $echo = true, $wordwrap = true, $esc = true, $autolink = true, $nl2br = false)
 {
     $this->project = $request->project;
     $this->request = $request;
     $this->scm = IDF_Scm::get($request->project);
     if ($esc) {
         $text = Pluf_esc($text);
     }
     if ($autolink) {
         $text = preg_replace('#([a-z]+://[^\\s\\(\\)]+)#i', '<a href="\\1">\\1</a>', $text);
     }
     if ($request->rights['hasIssuesAccess']) {
         $text = preg_replace_callback('#((?:issue|bug|ticket)(s)?\\s+|\\s+\\#)(\\d+)(\\#ic\\d+)?(?(2)((?:[, \\w]+(?:\\s+\\#)?)?\\d+(?:\\#ic\\d+)?){0,})#im', array($this, 'callbackIssues'), $text);
     }
     if ($request->rights['hasReviewAccess']) {
         $text = preg_replace_callback('#(reviews?\\s+)(\\d+(?:(?:\\s+and|\\s+or|,)\\s+\\d+)*)\\b#i', array($this, 'callbackReviews'), $text);
     }
     if ($request->rights['hasSourceAccess']) {
         $text = preg_replace_callback('#(commits?\\s+)([0-9a-f]{1,40}(?:(?:\\s+and|\\s+or|,)\\s+[0-9a-f]{1,40})*)\\b#i', array($this, 'callbackCommits'), $text);
         $text = preg_replace_callback('#(src:)([^\\s\\(\\)\\\\]+(?:(\\\\)\\s+[^\\s\\(\\)\\\\]+){0,})+#im', array($this, 'callbackSource'), $text);
     }
     if ($wordwrap) {
         $text = Pluf_Text::wrapHtml($text, 69, "\n");
     }
     if ($nl2br) {
         $text = nl2br($text);
     }
     if ($echo) {
         echo $text;
     } else {
         return $text;
     }
 }
Esempio n. 2
0
 function _toIndex()
 {
     $r = array();
     foreach ($this->get_comments_list() as $c) {
         $r[] = $c->_toIndex();
     }
     $str = str_repeat($this->summary . ' ', 4) . ' ' . implode(' ', $r);
     return Pluf_Text::cleanString(html_entity_decode($str, ENT_QUOTES, 'UTF-8'));
 }
Esempio n. 3
0
 function start($text, $request, $echo = true, $wordwrap = true, $esc = true, $autolink = true, $nl2br = false)
 {
     $this->project = $request->project;
     $this->request = $request;
     $this->scm = IDF_Scm::get($request->project);
     if ($esc) {
         $text = Pluf_esc($text);
     }
     if ($autolink) {
         $text = preg_replace('#([a-z]+://[^\\s\\(\\)]+)#i', '<a href="\\1">\\1</a>', $text);
     }
     if ($request->rights['hasIssuesAccess']) {
         $text = preg_replace_callback('#((?:issue|bug|ticket)(s)?\\s+|\\s+\\#)(\\d+)(\\#ic\\d+)?(?(2)((?:[, \\w]+(?:\\s+\\#)?)?\\d+(?:\\#ic\\d+)?){0,})#im', array($this, 'callbackIssues'), $text);
     }
     if ($request->rights['hasReviewAccess']) {
         $text = preg_replace_callback('#(reviews?\\s+)(\\d+(?:(?:\\s+and|\\s+or|,)\\s+\\d+)*)\\b#i', array($this, 'callbackReviews'), $text);
     }
     if ($request->rights['hasSourceAccess']) {
         $verbs = array('added', 'fixed', 'reverted', 'changed', 'removed');
         $nouns = array('commit', 'commits', 'revision', 'revisions', 'rev', 'revs');
         $prefix = implode(' in|', $verbs) . ' in' . '|' . implode('|', $nouns);
         $text = preg_replace_callback('#((?:' . $prefix . ')(?:\\s+r?))([0-9a-f]{1,40}((?:\\s+and|\\s+or|,)\\s+r?[0-9a-f]{1,40})*)\\b#i', array($this, 'callbackCommits'), $text);
         $text = preg_replace_callback('=(src:)([^\\s@#,\\(\\)\\\\]+(?:(\\\\)[\\s@#][^\\s@#,\\(\\)\\\\]+){0,})+(?:\\@([^\\s#,]+))(?:#(\\d+))?=im', array($this, 'callbackSource'), $text);
     }
     if ($wordwrap) {
         $text = Pluf_Text::wrapHtml($text, 69, "\n");
     }
     if ($nl2br) {
         $text = nl2br($text);
     }
     if ($echo) {
         echo $text;
     } else {
         return $text;
     }
 }
Esempio n. 4
0
 function _toIndex()
 {
     $str = str_repeat($this->summary . ' ', 4) . ' ' . $this->fullmessage;
     return Pluf_Text::cleanString(html_entity_decode($str, ENT_QUOTES, 'UTF-8'));
 }
Esempio n. 5
0
 /**
  * Index a document.
  *
  * See Pluf_Search for the disclaimer and informations.
  *
  * @param Pluf_Model Document to index.
  * @param Stemmer used. ('Pluf_Text_Stemmer_Porter')
  * @return array Statistics.
  */
 public static function index($doc, $stemmer = 'Pluf_Text_Stemmer_Porter')
 {
     $words = Pluf_Text::tokenize($doc->_toIndex());
     if ($stemmer != null) {
         $words = self::stem($words, $stemmer);
     }
     // Get the total number of words.
     $total = 0.0;
     $words_flat = array();
     foreach ($words as $word => $occ) {
         $total += (double) $occ;
         $words_flat[] = $word;
     }
     // Drop the last indexation.
     $gocc = new IDF_Search_Occ();
     $sql = new Pluf_SQL('DELETE FROM ' . $gocc->getSqlTable() . ' WHERE model_class=%s AND model_id=%s', array($doc->_model, $doc->id));
     $db =& Pluf::db();
     $db->execute($sql->gen());
     // Get the ids for each word.
     $ids = self::getWordIds($words_flat);
     // Insert a new word for the missing words and add the occ.
     $n = count($ids);
     $new_words = 0;
     $done = array();
     for ($i = 0; $i < $n; $i++) {
         if ($ids[$i] === null) {
             $word = new Pluf_Search_Word();
             $word->word = $words_flat[$i];
             try {
                 $word->create();
                 $new_words++;
                 $ids[$i] = $word->id;
             } catch (Exception $e) {
                 // 100% of the time, the word has been created
                 // by another process in the background.
                 $r_ids = self::getWordIds(array($word->word));
                 if ($r_ids[0]) {
                     $ids[$i] = $r_ids[0];
                 } else {
                     // give up for this word
                     continue;
                 }
             }
         }
         if (isset($done[$ids[$i]])) {
             continue;
         }
         $done[$ids[$i]] = true;
         $occ = new IDF_Search_Occ();
         $occ->word = new Pluf_Search_Word($ids[$i]);
         $occ->model_class = $doc->_model;
         $occ->model_id = $doc->id;
         $occ->project = $doc->get_project();
         $occ->occ = $words[$words_flat[$i]];
         $occ->pondocc = $words[$words_flat[$i]] / $total;
         $occ->create();
     }
     // update the stats
     $sql = new Pluf_SQL('model_class=%s AND model_id=%s', array($doc->_model, $doc->id));
     $last_index = Pluf::factory('Pluf_Search_Stats')->getList(array('filter' => $sql->gen()));
     if ($last_index->count() == 0) {
         $stats = new Pluf_Search_Stats();
         $stats->model_class = $doc->_model;
         $stats->model_id = $doc->id;
         $stats->indexations = 1;
         $stats->create();
     } else {
         $last_index[0]->indexations += 1;
         $last_index[0]->update();
     }
     return array('total' => $total, 'new' => $new_words, 'unique' => $n);
 }
Esempio n. 6
0
 /**
  * Index a document.
  *
  * The document must provide a method _toIndex() returning the
  * document as a string for indexation. The string must be clean
  * and will simply be tokenized by Pluf_Text::tokenize().
  *
  * So a recommended way to clean it at the end is to remove all
  * the HTML tags and then run the following on it:
  *
  *  return Pluf_Text::cleanString(html_entity_decode($string,
  *                                ENT_QUOTES, 'UTF-8'));
  *
  * Indexing is resource intensive so it is recommanded to run the
  * indexing in an asynchronous way. When you save a resource to be
  * indexed, just write a log "need to index resource x" and then
  * you can every few minutes index the resources. Nobody care if
  * your index is not perfectly fresh, but your end users care if
  * it takes 0.6s to get back the page instead of 0.1s.
  *
  * Take 500 average documents, index them while counting the total
  * time it takes to index. Divide by 500 and if the result is more
  * than 0.1s, use a log/queue.
  *
  * FIXME: Concurrency problem if you index at the same time the same doc.
  *
  * @param Pluf_Model Document to index.
  * @param Stemmer used. ('Pluf_Text_Stemmer_Porter')
  * @return array Statistics.
  */
 public static function index($doc, $stemmer = 'Pluf_Text_Stemmer_Porter')
 {
     $words = Pluf_Text::tokenize($doc->_toIndex());
     if ($stemmer != null) {
         $words = self::stem($words, $stemmer);
     }
     // Get the total number of words.
     $total = 0.0;
     $words_flat = array();
     foreach ($words as $word => $occ) {
         $total += (double) $occ;
         $words_flat[] = $word;
     }
     // Drop the last indexation.
     $gocc = new Pluf_Search_Occ();
     $sql = new Pluf_SQL('DELETE FROM ' . $gocc->getSqlTable() . ' WHERE model_class=%s AND model_id=%s', array($doc->_model, $doc->id));
     $db =& Pluf::db();
     $db->execute($sql->gen());
     // Get the ids for each word.
     $ids = self::getWordIds($words_flat);
     // Insert a new word for the missing words and add the occ.
     $n = count($ids);
     $new_words = 0;
     $done = array();
     for ($i = 0; $i < $n; $i++) {
         if ($ids[$i] === null) {
             $word = new Pluf_Search_Word();
             $word->word = $words_flat[$i];
             try {
                 $word->create();
                 $ids[$i] = $word->id;
             } catch (Exception $e) {
                 // most likely concurrent addition of a word, try
                 // to read it.
                 $_ids = self::getWordIds(array($words_flat[$i]));
                 if ($_ids[0] !== null) {
                     // if we miss it here, just forget about it
                     $ids[$i] = $_ids[0];
                 }
             }
             $new_words++;
         }
         if (isset($done[$ids[$i]])) {
             continue;
         }
         $done[$ids[$i]] = true;
         $occ = new Pluf_Search_Occ();
         $occ->word = new Pluf_Search_Word($ids[$i]);
         $occ->model_class = $doc->_model;
         $occ->model_id = $doc->id;
         $occ->occ = $words[$words_flat[$i]];
         $occ->pondocc = $words[$words_flat[$i]] / $total;
         $occ->create();
     }
     // update the stats
     $sql = new Pluf_SQL('model_class=%s AND model_id=%s', array($doc->_model, $doc->id));
     $last_index = Pluf::factory('Pluf_Search_Stats')->getList(array('filter' => $sql->gen()));
     if ($last_index->count() == 0) {
         $stats = new Pluf_Search_Stats();
         $stats->model_class = $doc->_model;
         $stats->model_id = $doc->id;
         $stats->indexations = 1;
         $stats->create();
     } else {
         $last_index[0]->indexations += 1;
         $last_index[0]->update();
     }
     return array('total' => $total, 'new' => $new_words, 'unique' => $n);
 }
Esempio n. 7
0
 /**
  * Returns the n-grams of rank n of the word.
  *
  * @param string Word.
  * @return array N-grams
  */
 public static function makeNgrams($word, $n = 3)
 {
     $chars = array('_');
     $chars = $chars + Pluf_Text::stringToChars($word);
     $chars[] = '_';
     $l = count($chars);
     $ngrams = array();
     for ($i = 0; $i < $l + 1 - $n; $i++) {
         $ngrams[$i] = array();
     }
     $n_ngrams = $l + 1 - $n;
     for ($i = 0; $i < $l; $i++) {
         for ($j = 0; $j < $n; $j++) {
             if (isset($ngrams[$i - $j])) {
                 $ngrams[$i - $j][] = $chars[$i];
             }
         }
     }
     $out = array();
     foreach ($ngrams as $ngram) {
         $t = implode('', $ngram);
         if ($t != '__') {
             $out[] = $t;
         }
     }
     return $out;
 }
Esempio n. 8
0
 function _toIndex()
 {
     $rev = $this->get_current_revision()->_toIndex();
     $str = str_repeat($this->title . ' ' . $this->summary . ' ', 4) . ' ' . $rev;
     return Pluf_Text::cleanString(html_entity_decode($str, ENT_QUOTES, 'UTF-8'));
 }