public static function titleFromUrl($url) { if (filter_var($url, FILTER_VALIDATE_URL)) { try { $str = file_get_contents($url, NULL, NULL, 0, 16384); } catch (Exception $e) { return "url not found"; } if (strlen($str) > 0) { preg_match("/\\<title\\>(.*)\\<\\/title\\>/", $str, $title); if (count($title) > 1) { return substr(trim(html_entity_decode($title[1], ENT_QUOTES, 'UTF-8')), 0, 128); } else { try { $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($str); } catch (ErrorException $e) { return "title parsing error"; } if (!$html) { return "sorry, we couldn't get a title"; } foreach ($html->find("title") as $e) { return substr(trim(html_entity_decode($e->plaintext, ENT_QUOTES, 'UTF-8')), 0, 128); } return "no title found"; } } } else { return "not url"; } }
/** * @return mixed|void */ public function search() { $page = $this->getSource(); $doc = Sunra\PhpSimple\HtmlDomParser::str_get_html($page->getBody()); $elements = $doc->find('li.track'); $audios = []; foreach ($elements as $item) { $audios[] = array('artist' => ['name' => ucwords($this->clearstr(html_entity_decode(trim($item->find('b', 0)->innertext()))))], 'title' => ucwords($this->clearstr(html_entity_decode(trim($item->find('i', 0)->innertext())))), 'duration' => $item->find('em', 1)->innertext(), 'url' => $item->find('a.playlist-down', 0)->href); } return $audios; }
/** * @param int $limit * @return array */ function getLastQueries($limit = 10) { $results = Memcache\Handler::factory()->cache('now', 5, function () { $source = Sunra\PhpSimple\HtmlDomParser::file_get_html('http://mp3skull.com/latest.html'); $links = $source->find('#content a'); $result = []; foreach ($links as $link) { $result[] = $link->innertext; } return $result; }); if (count($results) <= $limit) { return $results; } return array_slice($results, 0, $limit); }
/** * @return mixed|void */ public function search() { $page = $this->getSource(); $audios = []; if (count($page->getErrors()) == 0) { $doc = Sunra\PhpSimple\HtmlDomParser::str_get_html($page->getBody()); $elements = $doc->find('li.track'); foreach ($elements as $item) { preg_match('/[0-9\\:]+/iu', strip_tags($item->find('em', 0)->innertext()), $match); if (isset($match[0])) { $audios[] = array('artist' => ['name' => ucwords($this->clearstr(html_entity_decode(trim($item->find('b', 0)->innertext()))))], 'title' => ucwords($this->clearstr(html_entity_decode(trim($item->find('span', 0)->innertext())))), 'duration' => $match[0], 'url' => $item->find('a.playlist-down', 0)->href); } } } return $audios; }
} } //*/ return $app['twig']->render('equipe.twig', array('equipe' => $equipe, 'scores' => $scores)); })->bind("equipe"); /** * Live import from TDF website */ $app->get("/import", function () use($app) { $classement = array(); // Load HTML source filefile $client = new GuzzleHttp\Client(); $res = $client->get($app['import_url']); if ($res->getStatusCode() == "200") { // Parse result HTML $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($res->getBody()); foreach ($html->find('tbody tr') as $k => $element) { // Sale mais d'un sale... if ($element->children(2)->tag == "td" && (int) $element->children(2)->innertext > 0) { $coureur = array(); $coureur['dossard'] = $element->children(2)->innertext; # Le numéro de dossard $coureur['nom'] = $element->children(1)->find("a")->innertext; //$coureur['tag'] = $element->children(2)->tag; // Ajout du coureur $classement[] = $coureur; } } echo "<pre>"; var_dump($classement); echo "</pre>";
/** * @param int $id * @param string $url * * @return array */ private function findNewsData($id, $url) { $newsData = []; $parser = new \Sunra\PhpSimple\HtmlDomParser(); $htmlNews = $parser->file_get_html($url); if ($id == 1) { $newsData['title'] = $htmlNews->find('h1', 0); $newsData['content'] = $htmlNews->find('div.main_news_detail div.description div.text', 0); } elseif ($id == 2) { $newsData['title'] = $htmlNews->find('div.lside h1', 0); $newsData['content'] = $htmlNews->find('div.lside div.fnblk div.fntxt', 0); } elseif ($id == 3) { $newsData['title'] = $htmlNews->find('div#content-middle h1', 0); $newsData['content'] = $htmlNews->find('div#content-middle div[class="simple-content mt15"] div[class="simple-content mt25"]', 0); } elseif (in_array($id, [4, 5])) { $newsData['title'] = $htmlNews->find('div.reader_article h3.reader_article_headline', 0); $newsData['content'] = $htmlNews->find('div.reader_article div.reader_article_body', 0); } elseif ($id == 6) { $newsData['title'] = $htmlNews->find('h2.content__title', 0); $newsData['content'] = $htmlNews->find('div.article__text', 0); } return $newsData; }
/** * Get results from vkontakte * * @return array */ private function getResults() { $url = 'http://vk.com/audio'; $page = $this->getClient($url, array(CURLOPT_REFERER => $url))->setCookie($this->cookieFile)->post(['act' => 'search', 'al' => 1, 'autocomplete' => 1, 'offset' => $this->_params['offset'], 'q' => urlencode($this->_params['query'])]); $audios = array(); if (count($page->getErrors()) == 0) { $doc = Sunra\PhpSimple\HtmlDomParser::str_get_html($page->getBody()); $elements = $doc->find('.audio'); foreach ($elements as $element) { $artist = $this->wordFilter(strip_tags($element->find('.title_wrap a', 0)->innertext())); $title = $this->wordFilter(strip_tags($element->find('span.title', 0)->innertext())); $duration = explode(':', $element->find('div.duration', 0)->innertext()); if ($duration[0] == 0 || count($duration) > 2 || $duration[0] > 7) { continue; } $url = $element->find('input', 0)->attr['value']; $artist = $this->clearstr($artist); $title = $this->clearstr($title); $count = count(explode(' ', $title)); $countA = count(explode(' ', $artist)); if ($count <= 7 && $count != 0 && !empty($title) && ($countA <= 7 && $countA != 0 && !empty($artist))) { $audios[] = array('artist' => ['name' => html_entity_decode($artist)], 'title' => html_entity_decode($title), 'duration' => (strlen($duration[0]) == 1 ? '0' . $duration[0] : $duration[0]) . ':' . $duration[1], 'url' => $url); } } } return $audios; }