Esempio n. 1
1
/**
 * Search for a tag within an XML DOMDocument
 *
 * @param  stdClass $tagname The name of the tag to search for
 * @param  XPath    $xpath   The XML to find the tag in
 * @param  XPath    $attribute The attribute to search for (if we should search for a child node with the given
 * value for the name attribute
 * @since Moodle 3.1
 */
function get_tag($tagname, $xpath, $attribute = null)
{
    if ($attribute) {
        $result = $xpath->query('//*[local-name() = \'' . $tagname . '\'][@name="' . $attribute . '"]');
    } else {
        $result = $xpath->query('//*[local-name() = \'' . $tagname . '\']');
    }
    if ($result->length > 0) {
        return $result->item(0)->nodeValue;
    }
    return null;
}
Esempio n. 2
0
function scrapeImdb($url)
{
    $baseurl = "http://www.imdb.com";
    $array = array();
    $xpath = new XPath($url);
    $imgsrcQuery = $xpath->query("//td[@class='image']//img/@src");
    $imgtitleQuery = $xpath->query("//td[@class='image']//img/@title");
    $linktitleQuery = $xpath->query("//td[@class='title']/a/text()");
    $linkhrefQuery = $xpath->query("//td[@class='title']/a/@href");
    $fh = fopen("imdb.txt", "a++");
    for ($x = 0; $x < $linkhrefQuery->length; $x++) {
        $string = $array[$x]['imageTitle'] = $imgtitleQuery->item($x)->nodeValue . "*";
        $string .= $array[$x]['imageSource'] = $imgsrcQuery->item($x)->nodeValue . "*";
        $string .= $array[$x]['linkTitle'] = $linktitleQuery->item($x)->nodeValue . "*";
        $string .= $array[$x]['linkHref'] = $baseurl . $linkhrefQuery->item($x)->nodeValue . "*";
        $string .= fwrite($fh, $string . "\n\n\n\n");
        /*$array[$x]['imageTitle'] = $imgtitleQuery->item($x)->nodeValue;
        $array[$x]['imageSource'] = $imgsrcQuery->item($x)->nodeValue;
        $array[$x]['linkTitle'] = $linktitleQuery->item($x)->nodeValue;
        $array[$x]['linkHref'] = $baseurl . $linkhrefQuery->item($x)->nodeValue;
        */
    }
    fclose($fh);
    // check for the "Next" link
    $nextPageQuery = $xpath->query("(//span[@class='pagination']/a[contains(., 'Next')])[1]/@href");
    if ($nextPageQuery->length) {
        $nextUrl = $baseurl . $nextPageQuery->item(0)->nodeValue;
        //	$array = array_merge($array, scrapeImdb($nextUrl)); // merging the array and recursively calling the function
        scrapeImdb($nextUrl);
    }
    return $array;
}
Esempio n. 3
0
 function GetBook($bookid)
 {
     //$uri = "http://www.bengou.cm/cartoon/douluodalu/";
     list($bname, $bid) = explode("_", $bookid);
     $uri = sprintf('http://bengou.cm/cartoon/%s/', $bname);
     $html = $this->http->get($uri, "email.gif");
     $html = str_replace("<head>", '<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />', $html);
     if (strlen($html) < 1) {
         return False;
     }
     $sections = array();
     $xpath = new XPath($html);
     $elements = $xpath->query("//div[@class='section-list mark']");
     foreach ($elements as $element) {
         $chapters = array();
         $nodes = $xpath->query("span/a", $element);
         foreach ($nodes as $node) {
             $href = $node->getattribute("href");
             $name = $node->nodeValue;
             //http://bengou.cm/cartoon/xiudougaoxiao/7278_134945.html
             list($bid, $cid) = explode("_", basename($href, ".html"));
             $chapters[] = array("name" => $name, "id" => $cid);
         }
         $section = array();
         $section["name"] = $xpath->get_value("h6", $element);
         $section["chapters"] = $chapters;
         $sections[] = $section;
     }
     $datetime = $xpath->get_value("//div[@class='cartoon-intro']/div/p[6]");
     list($year, $mon, $day, $h, $m, $s) = sscanf($datetime, "更新时间:%d/%d/%d %d:%d:%d");
     $book = array();
     $book["icon"] = $xpath->get_attribute("//div[@class='cartoon-intro']/a/img", "src");
     $book["author"] = $xpath->get_value("//div[@class='cartoon-intro']/div/p[1]/a");
     $book["status"] = $xpath->get_value("//div[@class='cartoon-intro']/div/p[2]");
     $book["catalog"] = $xpath->get_value("//div[@class='cartoon-intro']/div/p[3]");
     $book["tags"] = $xpath->get_value("//div[@class='cartoon-intro']/div/p[4]");
     $book["region"] = $xpath->get_value("//div[@class='cartoon-intro']/div/p[5]/a");
     $book["datetime"] = date("Y-m-d H:i:s", mktime($h, $m, $s, $mon, $day, $year));
     $book["summary"] = $xpath->get_value("//p[@id='cartoon_digest2']");
     $book["section"] = $sections;
     return $book;
 }
 /**
  * Unmarshal XML to an object
  *
  * @param   xml.parser.InputSource source
  * @param   string classname
  * @param   [:var] inject
  * @return  lang.Object
  * @throws  lang.ClassNotFoundException
  * @throws  xml.XMLFormatException
  * @throws  lang.reflect.TargetInvocationException
  * @throws  lang.IllegalArgumentException
  */
 public function unmarshalFrom(InputSource $input, $classname, $inject = array())
 {
     libxml_clear_errors();
     $doc = new DOMDocument();
     if (!$doc->load(Streams::readableUri($input->getStream()))) {
         $e = libxml_get_last_error();
         throw new XMLFormatException(trim($e->message), $e->code, $input->getSource(), $e->line, $e->column);
     }
     $xpath = new XPath($doc);
     // Class factory based on tag name, reference to a static method which is called with
     // the class name and returns an XPClass instance.
     $class = XPClass::forName($classname);
     if ($class->hasAnnotation('xmlmapping', 'factory')) {
         if ($class->hasAnnotation('xmlmapping', 'pass')) {
             $factoryArgs = array();
             foreach ($class->getAnnotation('xmlmapping', 'pass') as $pass) {
                 $factoryArgs[] = self::contentOf($xpath->query($pass, $doc->documentElement));
             }
         } else {
             $factoryArgs = array($doc->documentElement->nodeName);
         }
         $class = $class->getMethod($class->getAnnotation('xmlmapping', 'factory'))->invoke(NULL, $factoryArgs);
     }
     return self::recurse($xpath, $doc->documentElement, $class, $inject);
 }
Esempio n. 5
0
 private function __ParseBooks($html)
 {
     $books = array();
     $html = str_replace("text/html; charset=gb2312", "text/html; charset=gb18030", $html);
     $xpath = new XPath($html);
     $elements = $xpath->query("//div[@class='border']/div/ul/li/a");
     foreach ($elements as $element) {
         $href = $element->getattribute('href');
         $book = $element->getattribute('title');
         if (strlen($href) > 0 && strlen($book) > 0) {
             $bookid = basename($href, ".html");
             $books[basename(dirname($href)) . '-' . substr($bookid, 8)] = $book;
         }
     }
     return $books;
 }
Esempio n. 6
0
function __ParseChapter($html)
{
    $chapters = array();
    $html = str_replace("text/html; charset=gb2312", "text/html; charset=gb18030", $html);
    if (strlen($html) < 1) {
        return $chapters;
    }
    $xpath = new XPath($html);
    $elements = $xpath->query("//li[@class='a1']/a");
    foreach ($elements as $element) {
        $href = $element->getattribute('href');
        $chapter = $element->nodeValue;
        if (strlen($href) > 0 && strlen($chapter) > 0) {
            list($play, $chapterid) = explode("_", basename($href, ".html"));
            $chapters[] = array("name" => $chapter, "uri" => $chapterid);
        }
    }
    return $chapters;
}
Esempio n. 7
0
 public function queryTreeWithDefaultEncoding()
 {
     $value = new String('value öäü', 'utf-8');
     $xpath = new XPath($s = sprintf('<document><node>%s</node></document>', $value->getBytes('utf-8')));
     $this->assertEquals($value, new String($xpath->query('string(/document/node)'), 'utf-8'));
 }
Esempio n. 8
0
 function GetCatalogUrls($uri)
 {
     $html = $this->http->get($uri, "Ysjs/bot.js");
     $html = str_replace("text/html; charset=gb2312", "text/html; charset=gb18030", $html);
     $xpath = new XPath($html);
     $options = $xpath->query("//select[@name='select']/option");
     $urls = array();
     foreach ($options as $option) {
         $href = $option->getattribute('value');
         if (strlen($href) < 1) {
             continue;
         }
         $urls[] = dirname($uri) . '/' . $href;
     }
     return $urls;
 }
Esempio n. 9
0
 function GetCatalog()
 {
     $uri = 'http://www.xxbh.net/';
     $html = http_proxy_get($uri, "template/xxbh", 10);
     $html = str_replace("text/html; charset=gb2312", "text/html; charset=gb18030", $html);
     $catalog = array();
     $catalog["最近更新"] = '/comicone/page_a.html';
     $catalog["排行榜"] = 'comicone/page_b.html';
     $xpath = new XPath($html);
     $elements = $xpath->query("//ul[@class='ul4']/li/a");
     foreach ($elements as $element) {
         $href = $element->getattribute('href');
         $text = $element->nodeValue;
         if (strlen($href) > 1 && strlen($text) > 0) {
             $catalog[$text] = $href;
         }
     }
     return $catalog;
 }
Esempio n. 10
-1
 function GetChapter($bookid, $chapterid)
 {
     list($bname, $bid) = explode("_", $bookid);
     if (strlen($bname) > 0) {
         $uri = "http://www.imanhua.com/comic/{$bid}/{$bname}{$chapterid}.shtml";
     } else {
         $uri = "http://www.imanhua.com/comic/{$bid}/list_{$chapterid}.html";
     }
     $html = $this->http->get($uri, "foot_chapter.js");
     $html = str_replace("charset=gb2312", "charset=gb18030", $html);
     if (strlen($html) < 1) {
         return False;
     }
     //file_put_contents("imanhua-$bookid-$chapterid.html", $html);
     //$html = file_get_contents("imanhua-$bookid-$chapterid.html");
     $xpath = new XPath($html);
     $scripts = $xpath->query("/html/head/script");
     if ($scripts->length > 0) {
         $script = $scripts->item(0)->nodeValue;
         $js = new V8Js();
         $js->executeString($script, "imanhua", V8Js::FLAG_FORCE_ARRAY);
         $cInfo = $js->executeString("cInfo;", "imanhua", V8Js::FLAG_FORCE_ARRAY);
         //$servers = array('c5.mangafiles.com', 'c4.mangafiles.com', 't5.mangafiles.com', 't4.mangafiles.com');
         if ($cInfo["cid"] > 7910) {
             // http://www.imanhua.com/comic/76/list_61224.html
             // http://c4.mangafiles.com/Files/Images/76/61224/imanhua_001.png
             // "/Files/Images/"+cInfo.bid+"/"+cInfo.cid+"/"+$cInfo["files"][$i]
             $pictures = array();
             foreach ($cInfo["files"] as $file) {
                 $pictures[] = "http://c4.mangafiles.com" . "/Files/Images/" . $cInfo["bid"] . "/" . $cInfo["cid"] . "/" . $file;
             }
             return $pictures;
         } else {
             // http://www.imanhua.com/comic/135/list_7198.html
             // "/pictures/135/7198/trdh01.jpg"
             foreach ($cInfo["files"] as $file) {
                 $pictures[] = "http://t4.mangafiles.com" . $file;
             }
             return $cInfo["files"];
         }
     } else {
         return array();
     }
 }