function __construct($name, $url) { $this->setName($name); $this->url_provider = $url; parent::loadContent($url); $this->loadAttributes(); }
/** Obtiene un array con los datos solicitados del video * * @return type */ function getUrlVideo() { $json = new HTMLProvider(); $json->loadContent("https://api.brightcove.com/services/library?command=find_video_by_id&video_id=" . $this->id_video . "&video_fields=" . Video::FIELD_FLVURL . "&media_delivery=http&token=" . $this->token); $data = json_decode($json->htmlContent, true); return $this->convertToSecure($data[Video::FIELD_FLVURL]); }
/** Retorna un array con un listado de enlace de imagenes de la busqueda realizada * * @return type */ function getResult() { $url = str_replace("@", urlencode($this->getTerm()), self::URL_SEARCH); parent::loadContent($url); $images = array(); if (!preg_match_all('/href=[\\"\']http(.+?)[\\"\']/i', $this->htmlContent, $match_links, PREG_SET_ORDER)) { return; } for ($i = 0; $i < count($match_links); $i++) { $images[] = "http" . $match_links[$i][1]; } $this->lastResult = $images; return $images; }
/** * Carga los datos de las producciones que se pondran en cola en la propiedad $dataRepository del objeto */ public function loadRepository() { if ($this->skip >= self::MAX_NUM_QUERY) { return; } parent::loadContent(self::URL_SOURCE . $this->skip); //Selecciona y divide en secciones HTML el contenido relevante de cada produccion obtenida de la fuente preg_match_all('/<td[^>]*class=["\']title*["\']\\>(.*?)<\\/td>/i', $this->htmlContent, $sections, PREG_SET_ORDER); foreach ($sections as $section) { $data = $section[0]; if (preg_match_all('/<a\\s+.*?href=[\\"\']?([^\\"\' >]*)[\\"\']?[^>]*>(.*?)<\\/a>/i', $data, $matches, PREG_SET_ORDER)) { foreach ($matches as $match) { if (!Util::isUrl(self::WEB_SITE_SOURCE . $match[1])) { continue; } //Array(titulo, Enlace) $this->dataRepository[] = array(strip_tags($match[0]), self::WEB_SITE_SOURCE . $match[1]); break; } } } }
private function getLatinTitle($title) { $url = "https://www.google.com.co/search?num=100&site=&source=hp&q=" . Util::convertTextToSearch($title) . "+wikipedia&oq=Straight+Outta+Compton+wi&gs_l=hp.3.0.35i39j0j0i22i30l8.7079.8305.0.9175.5.5.0.0.0.0.306.973.0j4j0j1.5.0....0...1c.1.64.hp..1.4.667.0.OJ2Ztj0KNyk"; $contentHtml = new HTMLProvider(); $contentHtml->loadContent($url); if (!preg_match_all('/<li[^>]*class=["\']g*["\']\\>(.*?)<\\/li>/i', $contentHtml->htmlContent, $match_result)) { return $title; } $link = Util::extractURLFromText($match_result[0][0]); $link = strip_tags($link[1]); //Pagina de wikipedia de la produccion if (strpos($link, "es.wikipedia") === false) { $contentHtml->loadContent($link); if (!preg_match_all('/<li[^>]*class=["\']interlanguage-link interwiki-es*["\']\\>(.*?)<\\/li>/i', $contentHtml->htmlContent, $match_result)) { return $title; } $regex = '/\\/\\/[^\\" ]+/i'; preg_match_all($regex, $match_result[1][0], $link_es); $link = $link_es[0][0]; } $contentHtml->loadContent(strpos($link, "https") === false ? "https:" . $link : $link); if (!preg_match_all('/<table\\s+.*?class=[\\"\']infobox plainlist plainlinks[\\"\']?[^>]*>(.*?)<\\/table>/i', $contentHtml->htmlContent, $match_result)) { return $title; } if (!preg_match_all('/Título<\\/th>(.*?)<\\/td>/i', $match_result[0][0], $match_info, PREG_SET_ORDER)) { return $title; } if (strpos($match_info[0][0], "España") !== false) { if (strpos($match_info[0][0], "<i>") !== false) { preg_match_all('/<i>(.*?)<\\/i>/i', $match_info[0][0], $match_title, PREG_SET_ORDER); if (strpos($match_info[0][0], "Latinoamérica") !== false) { if (strpos($match_info[0][0], "España") > strpos($match_info[0][0], "Latinoamérica")) { return isset($match_title[0][0]) ? strip_tags($match_title[0][0]) : $title; } else { return isset($match_title[1][0]) ? strip_tags($match_title[1][0]) : $title; } } if (strpos($match_info[0][0], "Hispanoamérica") !== false) { if (strpos($match_info[0][0], "España") > strpos($match_info[0][0], "Hispanoamérica")) { return isset($match_title[0][0]) ? strip_tags($match_title[0][0]) : $title; } else { return isset($match_title[1][0]) ? strip_tags($match_title[1][0]) : $title; } } return isset($match_title[0][0]) ? strip_tags($match_title[0][0]) : $title; } else { $hispanoamerica = 'Hispanoam' . utf8_decode("é") . 'rica'; $latinoamerica = 'Latinoam' . utf8_decode("é") . 'rica'; $espana = '(Espa' . utf8_decode("ñ") . 'a)'; $search_title = str_replace('T' . utf8_decode("í") . 'tulo', "", strip_tags(utf8_decode($match_info[0][0]))); if (strpos($search_title, $espana) < strpos($search_title, $hispanoamerica) || strpos($search_title, $espana) < strpos($search_title, $latinoamerica)) { $match_title = preg_replace('/.+\\(' . $espana . '\\)/i', "", $search_title); } else { $match_title = $search_title; } if (strpos($match_title, $hispanoamerica) !== false) { $match_title = str_replace("(" . $hispanoamerica . ")", "", preg_replace('/\\(' . $hispanoamerica . '\\)(.+)?/i', "", $match_title)); } if (strpos($match_title, $latinoamerica) !== false) { $match_title = str_replace("(" . $latinoamerica . ")", "", preg_replace('/\\(' . $latinoamerica . '\\)(.+)?/i', "", $match_title)); } return utf8_encode($match_title); } } else { if (!preg_match_all('/<i>(.*?)<\\/i>/i', $match_info[0][0], $match_title, PREG_SET_ORDER)) { return strip_tags(str_replace("Título</th>", "", $match_info[0][0])); } return Util::traslateText(strip_tags($match_title[0][0])); } }