Example #1
0
 function extract_dom()
 {
     $this->dom_obj = get_htmldom_obj($this->str);
     foreach ($this->element_arr as $k => $v) {
         if ($this->dom_obj) {
             foreach ($this->dom_obj->find($v) as $k2 => $v2) {
                 $key = $k . '_' . $k2;
                 $dom_arr[$key] = $v2->innertext;
                 $this->dom_info_arr[$key] = array('outertext' => $v2->outertext, 'parent' => array('attr' => $v2->parent()->attr, 'outertext' => $v2->parent()->outertext, 'tag_name' => $v2->parent()->tag), 'tag_name' => $v2->tag, 'attr' => $v2->attr);
                 $this->tag_arr[$key] = $v2->tag;
             }
         }
     }
     $dom_arr = array_map('trim', $dom_arr);
     $dom_arr = array_filter($dom_arr);
     return $dom_arr;
 }
Example #2
0
function get_other_info($content, $args)
{
    if (!$content) {
        return false;
    }
    extract($args);
    if (!$from_get_rules && !$author_get_rules && !$dateline_get_rules) {
        return false;
    }
    $html = get_htmldom_obj($content);
    if (!$html) {
        return false;
    }
    if ($from_get_rules) {
        if ($from_get_type == 1) {
            $re['from'] = dom_get_str($html, $from_get_rules);
        } else {
            $re['from'] = str_get_str($content, $from_get_rules, 'data');
        }
    }
    if ($author_get_rules) {
        if ($author_get_type == 1) {
            $re['author'] = dom_get_str($html, $author_get_rules);
        } else {
            $re['author'] = str_get_str($content, $author_get_rules, 'data');
        }
    }
    if ($dateline_get_rules) {
        if ($dateline_get_type == 1) {
            $re['article_dateline'] = dom_get_str($html, $dateline_get_rules);
            unset($div);
        } else {
            $re['article_dateline'] = str_get_str($content, $dateline_get_rules, 'data');
        }
    }
    foreach ((array) $re as $k => $v) {
        $re[$k] = format_html($v);
    }
    $html->clear();
    unset($html);
    return $re;
}
Example #3
0
 function get_content_page($content)
 {
     if ($this->p_arr['content_page_get_type'] == 1) {
         $html = get_htmldom_obj($content);
         if (!$html) {
             return false;
         }
         foreach ($html->find($this->p_arr['content_page_rules']) as $v) {
             $a_url = $this->format_url($v->attr['href']);
             if (!$a_url || $a_url == '#' || $v->innertext == milu_lang('up_page')) {
                 continue;
             }
             $item[] = _expandlinks($a_url, $this->base_url);
             $re_arr = sarray_unique($item);
         }
         $html->clear();
         unset($html);
     } else {
         $re_arr = string_page_link($content, $this->p_arr['content_page_rules'], $this->now_url);
         //字符串
     }
     return $re_arr;
 }