Пример #1
0
function docreader_get_description($doc)
{
    if (preg_match('|<meta name="description" content="([^"]*?)" />|is', $doc, $regs)) {
        return preg_replace('|[\\r\\n\\t ]+|s', ' ', trim(strip_tags($regs[1])));
    }
    return substr(docreader_get_body($doc), 0, 255) . '...';
}
Пример #2
0
 // get content type (ie. if pdf, don't parse as an html file)
 $finfo = pathinfo($f);
 $fext = strtolower($finfo['extension']);
 if (in_array($fext, array('html', 'htm'))) {
     // parse file
     $doc = docreader_get_data($f);
     if (!$doc) {
         continue;
     }
     $title = docreader_get_title($doc);
     if (!$title) {
         $title = 'Untitled';
     }
     $description = docreader_get_description($doc);
     $keywords = docreader_get_keywords($doc);
     $body = extractor_run(docreader_get_body($doc), 'HTML');
     unset($doc);
 } else {
     $body = extractor_run($f);
     if (!$body) {
         $body = '';
     }
     $description = '';
     $keywords = '';
     $title = basename($f);
 }
 $data = array('title' => $title, 'url' => $url, 'description' => $description, 'keywords' => $keywords, 'body' => $body, 'access' => 'public', 'status' => 'approved', 'team' => 'none', 'ctype' => $ctype, 'mtime' => (string) $mtime, 'domain' => $domain);
 // add file to index
 $counts[$ctype]++;
 $res = $search->addDocument($data);
 if (!$res) {