/** * Busca imágentes/vídeos en la noticia. * @param inme_noticia_fuente $noti */ private function preview_noticia(&$noti) { if ($noti->editada) { /// si está editada, no hacemos nada echo 'E'; } else { if (is_null($noti->preview)) { /// primero intentamos asignar la imagen de un tema $tema0 = new inme_tema(); foreach ($noti->keywords() as $key) { $tema = $tema0->get($key); if ($tema) { if ($tema->imagen and $tema->activo) { $noti->preview = $tema->imagen; $noti->save(); echo 'T'; break; } } } /// ahora buscamos una previsualización $preview = new inme_noticia_preview(); $preview->load($noti->url, $noti->texto); if ($preview->type) { /** * nos interesan previews de youtube y vimeo, así como imágenes de imgur, * PERO si es una imagen normal, solamente la queremos si no tenemos nada. */ if (is_null($noti->preview) and ($preview->type == 'imgur' or $preview->type == 'image')) { $noti->preview = $preview->preview(); $noti->texto .= "\n<div class='thumbnail'>\n<img src='" . $preview->link . "' alt='" . $noti->titulo . "'/>\n</div>"; $noti->editada = TRUE; $noti->save(); } else { if ($preview->type == 'youtube') { $imagen = $preview->preview(); if ($imagen) { $noti->preview = $imagen; $noti->texto = '<div class="embed-responsive embed-responsive-16by9">' . '<iframe class="embed-responsive-item" src="//www.youtube-nocookie.com/embed/' . $preview->filename . '"></iframe>' . '</div><br/>' . $noti->texto; $noti->editada = TRUE; $noti->save(); } } else { if ($preview->type == 'vimeo') { $imagen = $preview->preview(); if ($imagen) { $noti->preview = $imagen; $noti->texto = '<div class="embed-responsive embed-responsive-16by9">' . '<iframe class="embed-responsive-item" src="//player.vimeo.com/video/' . $preview->filename . '"></iframe>' . '</div><br/>' . $noti->texto; $noti->editada = TRUE; $noti->save(); } } } } } else { if (is_null($noti->preview)) { /// exploramos la página para buscar imágenes $html = $preview->curl_download($noti->url); $txt_adicional = FALSE; $urls = array(); if (preg_match_all('@<meta property="og:image" content="([^"]+)@', $html, $urls)) { foreach ($urls[1] as $url) { $preview->load($url); if ($preview->type and stripos($url, 'logo') === FALSE and $noti->preview != $preview->link) { $noti->preview = $preview->preview(); $noti->save(); $txt_adicional = "\n<div class='thumbnail'>\n<img src='" . $preview->link . "' alt='" . $noti->titulo . "'/>\n</div>"; break; } } } if (!$preview->type) { /// buscamos vídeos de youtube o vimeo $urls = array(); if (preg_match_all('@((https?://)?([-\\w]+\\.[-\\w\\.]+)+\\w(:\\d+)?(/([-\\w/_\\.]*(\\?\\S+)?)?)*)@', $html, $urls)) { foreach ($urls[0] as $url) { foreach (array('youtube', 'youtu.be', 'vimeo') as $domain) { if (strpos($url, $domain) !== FALSE) { $preview->load($url); if (in_array($preview->type, array('youtube', 'vimeo'))) { $noti->preview = $preview->preview(); $noti->save(); if ($preview->type == 'youtube') { $txt_adicional = '<div class="embed-responsive embed-responsive-16by9">' . '<iframe class="embed-responsive-item" src="//www.youtube-nocookie.com/embed/' . $preview->filename . '"></iframe>' . '</div>'; } else { if ($preview->type == 'vimeo') { $txt_adicional = '<div class="embed-responsive embed-responsive-16by9">' . '<iframe class="embed-responsive-item" src="//player.vimeo.com/video/' . $preview->filename . '"></iframe>' . '</div>'; } } break; } } } if ($preview->type) { break; } } } } if ($txt_adicional) { $noti->texto .= $txt_adicional; $noti->save(); } } } if (!is_null($noti->preview)) { echo 'I'; } } } }
/** * * @param type $item * @param inme_fuente $fuente */ private function nueva_noticia(&$item, &$fuente) { $url = NULL; /// intentamos obtener el enlace original de meneame $meneos = 0; foreach ($item->children('meneame', TRUE) as $element) { if ($element->getName() == 'url') { $url = (string) $element; } else { if ($element->getName() == 'votes') { $meneos = intval((string) $element); } } } if (is_null($url)) { /// intentamos obtener el enlace original de feedburner foreach ($item->children('feedburner', TRUE) as $element) { if ($element->getName() == 'origLink') { $url = (string) $element; break; } } /// intentamos leer el/los links if (is_null($url) and $item->link) { foreach ($item->link as $l) { if (mb_substr((string) $l, 0, 4) == 'http') { $url = (string) $l; } else { if ($l->attributes()->rel == 'alternate' and $l->attributes()->type == 'text/html') { $url = (string) $l->attributes()->href; } else { if ($l->attributes()->type == 'text/html') { $url = (string) $l->attributes()->href; } } } } } } /// reemplazamos los & $url = str_replace('&', '&', $url); if (is_null($url)) { $this->log[] = 'No se ha podido encontrar la url en ' . $item->asXML(); return 0; } /// ¿Ya existe la noticia en la bd? $nueva = FALSE; $noticia = $this->noticia->get_by_url($url); if (!$noticia) { $nueva = TRUE; /// si no existe la creamos $noticia = new inme_noticia_fuente(); $noticia->url = $url; $noticia->codfuente = $fuente->codfuente; if ($item->pubDate) { $noticia->fecha = date('d-m-Y H:i:s', min(array(strtotime((string) $item->pubDate), time()))); } else { if ($item->published) { $noticia->fecha = date('d-m-Y H:i:s', min(array(strtotime((string) $item->published), time()))); } } $noticia->titulo = (string) $item->title; if ($item->description) { $description = (string) $item->description; } else { if ($item->content) { $description = (string) $item->content; } else { if ($item->summary) { $description = (string) $item->summary; } else { $description = ''; /// intentamos leer el espacio de nombres atom foreach ($item->children('atom', TRUE) as $element) { if ($element->getName() == 'summary') { $description = (string) $element; break; } } foreach ($item->children('content', TRUE) as $element) { if ($element->getName() == 'encoded') { $description = (string) $element; break; } } } } } if ($fuente->meneame()) { /// quitamos el latiguillo de las noticias de menéame $aux = ''; for ($i = 0; $i < mb_strlen($description); $i++) { if (mb_substr($description, $i, 4) == '</p>') { break; } else { $aux .= mb_substr($description, $i, 1); } } $description = $aux; } /// eliminamos el html de la descripción $description = strip_tags(html_entity_decode($description, ENT_QUOTES, 'UTF-8')); $noticia->texto = $description; $noticia->resumen = substr($noticia->texto, 0, 300); /// procesamos las keywords de categorías if ($item->category) { foreach ($item->category as $cat) { if (strlen((string) $cat) > 1) { $tema = $this->tema->get((string) $cat); if (!$tema) { $tema = new inme_tema(); $tema->codtema = $tema->texto = (string) $cat; $tema->save(); } $noticia->set_keyword((string) $cat); } } } /// procesamos las keywords y las imágenes de media foreach ($item->children('media', TRUE) as $element) { if ($element->getName() == 'thumbnail') { $noticia->preview = (string) $element; } else { if ($element->getName() == 'keywords') { $aux = explode(',', (string) $element); if ($aux) { foreach ($aux as $a) { $noticia->set_keyword(trim($a)); } } } } } } if ($meneos > 0) { $noticia->meneos = $meneos; } if ($noticia->save()) { if ($nueva) { $this->log[] = 'Encontrada noticia: <a href="' . $noticia->url . '" target="_blank">' . $noticia->titulo . '</a>'; } } else { $this->log[] = 'Error al procesar la noticia: ' . $noticia->url; } }