public static function titleFromUrl($url) { if (filter_var($url, FILTER_VALIDATE_URL)) { try { $str = file_get_contents($url, NULL, NULL, 0, 16384); } catch (Exception $e) { return "url not found"; } if (strlen($str) > 0) { preg_match("/\\<title\\>(.*)\\<\\/title\\>/", $str, $title); if (count($title) > 1) { return substr(trim(html_entity_decode($title[1], ENT_QUOTES, 'UTF-8')), 0, 128); } else { try { $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($str); } catch (ErrorException $e) { return "title parsing error"; } if (!$html) { return "sorry, we couldn't get a title"; } foreach ($html->find("title") as $e) { return substr(trim(html_entity_decode($e->plaintext, ENT_QUOTES, 'UTF-8')), 0, 128); } return "no title found"; } } } else { return "not url"; } }
/** * @return mixed|void */ public function search() { $page = $this->getSource(); $doc = Sunra\PhpSimple\HtmlDomParser::str_get_html($page->getBody()); $elements = $doc->find('li.track'); $audios = []; foreach ($elements as $item) { $audios[] = array('artist' => ['name' => ucwords($this->clearstr(html_entity_decode(trim($item->find('b', 0)->innertext()))))], 'title' => ucwords($this->clearstr(html_entity_decode(trim($item->find('i', 0)->innertext())))), 'duration' => $item->find('em', 1)->innertext(), 'url' => $item->find('a.playlist-down', 0)->href); } return $audios; }
/** * @return mixed|void */ public function search() { $page = $this->getSource(); $audios = []; if (count($page->getErrors()) == 0) { $doc = Sunra\PhpSimple\HtmlDomParser::str_get_html($page->getBody()); $elements = $doc->find('li.track'); foreach ($elements as $item) { preg_match('/[0-9\\:]+/iu', strip_tags($item->find('em', 0)->innertext()), $match); if (isset($match[0])) { $audios[] = array('artist' => ['name' => ucwords($this->clearstr(html_entity_decode(trim($item->find('b', 0)->innertext()))))], 'title' => ucwords($this->clearstr(html_entity_decode(trim($item->find('span', 0)->innertext())))), 'duration' => $match[0], 'url' => $item->find('a.playlist-down', 0)->href); } } } return $audios; }
} } //*/ return $app['twig']->render('equipe.twig', array('equipe' => $equipe, 'scores' => $scores)); })->bind("equipe"); /** * Live import from TDF website */ $app->get("/import", function () use($app) { $classement = array(); // Load HTML source filefile $client = new GuzzleHttp\Client(); $res = $client->get($app['import_url']); if ($res->getStatusCode() == "200") { // Parse result HTML $html = Sunra\PhpSimple\HtmlDomParser::str_get_html($res->getBody()); foreach ($html->find('tbody tr') as $k => $element) { // Sale mais d'un sale... if ($element->children(2)->tag == "td" && (int) $element->children(2)->innertext > 0) { $coureur = array(); $coureur['dossard'] = $element->children(2)->innertext; # Le numéro de dossard $coureur['nom'] = $element->children(1)->find("a")->innertext; //$coureur['tag'] = $element->children(2)->tag; // Ajout du coureur $classement[] = $coureur; } } echo "<pre>"; var_dump($classement); echo "</pre>";
/** * @param int $id * @return bool * * @throws Kohana_Exception */ public function downloadNews($id) { /** @var $adminModel Model_Admin */ $adminModel = Model::factory('Admin'); $url = null; $source = Arr::get($this->findNewsSources($id), 0); if (0 === count($source)) { return false; } $sourceLink = Arr::get($source, 'link'); $mainUrl = preg_match('/com/', $sourceLink) ? substr($sourceLink, 0, strpos($sourceLink, '.') + 4) : substr($sourceLink, 0, strpos($sourceLink, '.') + 3); $parser = new \Sunra\PhpSimple\HtmlDomParser(); $html = $parser->file_get_html($sourceLink); $issetNews = $this->findNews($id, null, null, 'all'); $issetNewsSlug = []; foreach ($issetNews as $news) { $issetNewsSlug[] = $news['slug']; } $pageNewsLink = $this->findPagesNewsLink($id, $html, $issetNewsSlug); if (null === $pageNewsLink) { return false; } $url = $this->findNewsUrl($mainUrl, $pageNewsLink); if (null === $url) { return false; } $newsData = $this->findNewsData($id, $url); if (empty(Arr::get($newsData, 'title'))) { return false; } if (empty(Arr::get($newsData, 'content'))) { return false; } $slug = $adminModel->slugify($newsData['title']); if (in_array($slug, $issetNewsSlug)) { return false; } $content = $parser->str_get_html($newsData['content']); $contentText = $content->innertext; $imgs = $content->find('img'); foreach ($imgs as $img) { $contentText = str_replace($img->src, sprintf('%s%s', $mainUrl, $img->src), $contentText); } $links = $content->find('a'); foreach ($links as $link) { $contentText = !preg_match('/http/', $link->href) ? str_replace($link->href, sprintf('%s%s', $mainUrl, $link->href), $contentText) : $contentText; } DB::insert('news')->columns(['title', 'content', 'source_link', 'source_id', 'slug', 'viewed', 'list_viewed', 'date'])->values([$newsData['title']->innertext, $contentText, $url, $id, $slug, 0, 0, DB::expr('now()')])->execute(); return true; }
/** * Get results from vkontakte * * @return array */ private function getResults() { $url = 'http://vk.com/audio'; $page = $this->getClient($url, array(CURLOPT_REFERER => $url))->setCookie($this->cookieFile)->post(['act' => 'search', 'al' => 1, 'autocomplete' => 1, 'offset' => $this->_params['offset'], 'q' => urlencode($this->_params['query'])]); $audios = array(); if (count($page->getErrors()) == 0) { $doc = Sunra\PhpSimple\HtmlDomParser::str_get_html($page->getBody()); $elements = $doc->find('.audio'); foreach ($elements as $element) { $artist = $this->wordFilter(strip_tags($element->find('.title_wrap a', 0)->innertext())); $title = $this->wordFilter(strip_tags($element->find('span.title', 0)->innertext())); $duration = explode(':', $element->find('div.duration', 0)->innertext()); if ($duration[0] == 0 || count($duration) > 2 || $duration[0] > 7) { continue; } $url = $element->find('input', 0)->attr['value']; $artist = $this->clearstr($artist); $title = $this->clearstr($title); $count = count(explode(' ', $title)); $countA = count(explode(' ', $artist)); if ($count <= 7 && $count != 0 && !empty($title) && ($countA <= 7 && $countA != 0 && !empty($artist))) { $audios[] = array('artist' => ['name' => html_entity_decode($artist)], 'title' => html_entity_decode($title), 'duration' => (strlen($duration[0]) == 1 ? '0' . $duration[0] : $duration[0]) . ':' . $duration[1], 'url' => $url); } } } return $audios; }