コード例 #1
0
 function crawl($text, $imageQuantity, $header)
 {
     if (preg_match(Regex::$URL_REGEX, $text, $match)) {
         $title = "";
         $description = "";
         $videoIframe = "";
         $video = false;
         if (strpos($match[0], " ") === 0) {
             $match[0] = "http://" . substr($match[0], 1);
         }
         $finalUrl = $match[0];
         $pageUrl = $finalUrl;
         if (Content::isImage($pageUrl)) {
             $images = [$pageUrl];
         } else {
             $urlData = $this->getPage($pageUrl);
             if (!$urlData["content"] && strpos($pageUrl, "//www.") === false) {
                 if (strpos($pageUrl, "http://") !== false) {
                     $pageUrl = str_replace("http://", "http://www.", $pageUrl);
                 } elseif (strpos($pageUrl, "https://") !== false) {
                     $pageUrl = str_replace("https://", "https://www.", $pageUrl);
                 }
                 $urlData = $this->getPage($pageUrl);
             }
             $pageUrl = $finalUrl = $urlData["url"];
             $raw = $urlData["content"];
             $header = $urlData["header"];
             $metaTags = Content::getMetaTags($raw);
             $tempTitle = Content::extendedTrim($metaTags["title"]);
             if ($tempTitle != "") {
                 $title = $tempTitle;
             }
             if ($title == "") {
                 if (preg_match(Regex::$TITLE_REGEX, str_replace("\n", " ", $raw), $matching)) {
                     $title = $matching[2];
                 }
             }
             $tempDescription = Content::extendedTrim($metaTags["description"]);
             if ($tempDescription != "") {
                 $description = $tempDescription;
             } else {
                 $description = Content::crawlCode($raw);
             }
             $descriptionUnderstood = false;
             if ($description != "") {
                 $descriptionUnderstood = true;
             }
             if ($descriptionUnderstood == false && strlen($title) > strlen($description) && !preg_match(Regex::$URL_REGEX, $description) && $description != "" && !preg_match('/[A-Z]/', $description) || $title == $description) {
                 $title = $description;
                 $description = Content::crawlCode($raw);
             }
             if (Content::isJson($title)) {
                 $title = "";
             }
             if (Content::isJson($description)) {
                 $description = "";
             }
             $media = $this->getMedia($pageUrl);
             $images = count($media) == 0 ? array(Content::extendedTrim($metaTags["image"])) : array($media[0]);
             $videoIframe = $media[1];
             if (count($images) == 0 || $images[0] === "") {
                 $images = Content::getImages($raw, $pageUrl, $imageQuantity);
             }
             if ($media != null && $media[0] != "" && $media[1] != "") {
                 $video = true;
             }
             $title = Content::extendedTrim($title);
             $pageUrl = Content::extendedTrim($pageUrl);
             $description = Content::extendedTrim($description);
             $description = preg_replace(Regex::$SCRIPT_REGEX, "", $description);
         }
         $finalLink = explode("&", $finalUrl);
         $finalLink = $finalLink[0];
         $description = strip_tags($description);
         $videoIframe = $videoIframe == null ? "" : $videoIframe;
         $answer = array("title" => $title, "url" => $finalLink, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description, "image" => $images[0], "images" => $images, "video" => $video, "videoIframe" => $videoIframe);
         $result_json = Json::jsonSafe($answer, $header);
         $result_json_decoded = json_decode($result_json);
         $flagged = false;
         if (!isset($result_json_decoded->title)) {
             $title = utf8_encode($title);
             $flagged = true;
         }
         if (!isset($result_json_decoded->description)) {
             $description = utf8_encode($description);
             $flagged = true;
         }
         if ($flagged) {
             $answer = array("title" => $title, "url" => $finalLink, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description, "image" => $images[0], "images" => $images, "video" => $video, "videoIframe" => $videoIframe);
             return Json::jsonSafe($answer, $header);
         } else {
             return $result_json;
         }
     }
     return null;
 }
コード例 #2
0
 function crawl($text, $imageQuantity, $header)
 {
     if (preg_match(Regex::$urlRegex, $text, $match)) {
         $title = "";
         $description = "";
         $videoIframe = "";
         $video = "no";
         //if (strpos($match[0], " ") === 0)
         //    $match[0] = "http://" . substr($match[0], 1);
         $finalUrl = $text;
         $pageUrl = str_replace("https://", "http://", $finalUrl);
         $images = [];
         if (Content::isImage($pageUrl)) {
             $images[] = $pageUrl;
         } else {
             $urlData = $this->getPage($pageUrl);
             if (!$urlData["content"] && strpos($pageUrl, "//www.") === false) {
                 if (strpos($pageUrl, "http://") !== false) {
                     $pageUrl = str_replace("http://", "http://www.", $pageUrl);
                 } elseif (strpos($pageUrl, "https://") !== false) {
                     $pageUrl = str_replace("https://", "https://www.", $pageUrl);
                 }
                 $urlData = $this->getPage($pageUrl);
             }
             $pageUrl = $finalUrl = $urlData["url"];
             $raw = $urlData["content"];
             $header = $urlData["header"];
             $headers = $urlData["headers"];
             $metaTags = Content::getMetaTags($raw);
             $tempTitle = Content::extendedTrim($metaTags["title"]);
             if ($tempTitle != "") {
                 $title = $tempTitle;
             }
             if ($title == "") {
                 if (preg_match(Regex::$titleRegex, str_replace("\n", " ", $raw), $matching)) {
                     $title = $matching[2];
                 }
             }
             $tempDescription = Content::extendedTrim($metaTags["description"]);
             if ($tempDescription != "") {
                 $description = $tempDescription;
             } else {
                 $description = Content::crawlCode($raw);
             }
             $descriptionUnderstood = false;
             if ($description != "") {
                 $descriptionUnderstood = true;
             }
             if ($descriptionUnderstood == false && strlen($title) > strlen($description) && !preg_match(Regex::$urlRegex, $description) && $description != "" && !preg_match('/[A-Z]/', $description) || $title == $description) {
                 $title = $description;
                 $description = Content::crawlCode($raw);
             }
             if (Content::isJson($title)) {
                 $title = "";
             }
             if (Content::isJson($description)) {
                 $description = "";
             }
             $media = self::getMedia($pageUrl);
             if (count($media) == 0) {
                 foreach ($metaTags['images'] as $metaImage) {
                     $images[] = !preg_match(Regex::$httpRegex, $metaImage) ? Url::canonicalLink(Content::extendedTrim($metaImage), $pageUrl) : $metaImage;
                 }
             } else {
                 $images[] = $media[0];
                 $videoIframe = $media[1];
             }
             $images = array_merge($images, Content::getImages($raw, $pageUrl, $imageQuantity));
             $images = array_keys(array_flip($images));
             // filter out duplicate image urls
             if ($media != null && $media[0] != "" && $media[1] != "") {
                 $video = "yes";
             }
             $title = Content::extendedTrim($title);
             $pageUrl = Content::extendedTrim($pageUrl);
             $description = Content::extendedTrim($description);
             $description = preg_replace(Regex::$scriptRegex, "", $description);
         }
         $finalLink = explode("&", $finalUrl);
         $finalLink = $finalLink[0];
         $description = strip_tags($description);
         $can_brand = true;
         if (isset($headers["x-frame-options"]) && $headers["x-frame-options"] == "SAMEORIGIN") {
             $can_brand = false;
         }
         $answer = array("title" => $title, "url" => $finalLink, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description, "images" => $images, "video" => $video, "videoIframe" => $videoIframe, "canBrand" => $can_brand);
         $result_json = Json::jsonSafe($answer, $header);
         $result_json_decoded = json_decode($result_json);
         $flagged = false;
         if (!isset($result_json_decoded->title)) {
             $title = utf8_encode($title);
             $flagged = true;
         }
         if (!isset($result_json_decoded->description)) {
             $description = utf8_encode($description);
             $flagged = true;
         }
         if ($flagged) {
             $answer = array("title" => $title, "url" => $finalLink, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description, "images" => $images, "video" => $video, "videoIframe" => $videoIframe, "canBrand" => $can_brand);
             return Json::jsonSafe($answer, $header);
         } else {
             return $result_json;
         }
     }
     return null;
 }
コード例 #3
0
<?php

/**
 * Copyright (c) 2014 Leonardo Cardoso (http://leocardz.com)
 * Dual licensed under the MIT (http://www.opensource.org/licenses/mit-license.php)
 * and GPL (http://www.opensource.org/licenses/gpl-license.php) licenses.
 *
 * Version: 1.3.0
 */
include_once "classes/Database.php";
include_once "classes/SetUp.php";
include_once "classes/Json.php";
SetUp::headers();
$header = "";
$answer = Database::select();
echo Json::jsonSafe($answer, $header);
コード例 #4
0
ファイル: LinkPreview.php プロジェクト: aapthi/taggerzz-new
 function crawl($text, $imageQuantity, $header)
 {
     if (preg_match(Regex::$urlRegex, $text, $match)) {
         $title = "";
         $description = "";
         $videoIframe = "";
         $video = 0;
         if (strpos($match[0], " ") === 0) {
             $match[0] = "http://" . substr($match[0], 1);
         }
         $finalUrl = $match[0];
         $flag11 = false;
         if (strpos($finalUrl, "https://") !== false) {
             //echo "<pre>";print_r($finalUrl);
             $flag11 = true;
         }
         $pageUrl = str_replace("https://", "http://", $finalUrl);
         if (Content::isImage($pageUrl)) {
             $images = $pageUrl;
         } else {
             $urlData = $this->getPage($pageUrl);
             if (strpos($urlData["content"], "301 Moved Permanently") !== false) {
                 if ($flag11 == true) {
                     $pageUrl = str_replace("http://", "https://", $finalUrl);
                 }
                 $urlData = $this->getPage($pageUrl);
             }
             if (!$urlData["content"] && strpos($pageUrl, "//www.") === false) {
                 if (strpos($pageUrl, "http://") !== false) {
                     $pageUrl = str_replace("http://", "http://www.", $pageUrl);
                 } elseif (strpos($pageUrl, "https://") !== false) {
                     $pageUrl = str_replace("https://", "https://www.", $pageUrl);
                 }
                 $urlData = $this->getPage($pageUrl);
             }
             $urlData = Content::stripIrrelevantTags($urlData);
             $pageUrl = $finalUrl = $urlData["url"];
             $raw = $urlData["content"];
             //echo "<pre>";print_r($raw);
             $header = $urlData["header"];
             $metaTags = Content::getMetaTags($raw);
             //echo "<pre>";print_r($metaTags);exit;
             $keywords = "";
             if (isset($metaTags["keywords"])) {
                 $keywords = $metaTags["keywords"];
             }
             $tempTitle = Content::extendedTrim($metaTags["title"]);
             if ($tempTitle != "") {
                 $title = $tempTitle;
             }
             if ($title == "") {
                 if (preg_match(Regex::$titleRegex, str_replace("\n", " ", $raw), $matching)) {
                     $title = $matching[2];
                 }
             }
             $tempDescription = Content::extendedTrim($metaTags["description"]);
             if ($tempDescription != "") {
                 $description = $tempDescription;
             } else {
                 $description = Content::crawlCode($raw);
             }
             $descriptionUnderstood = false;
             if ($description != "") {
                 $descriptionUnderstood = true;
             }
             if ($descriptionUnderstood == false && strlen($title) > strlen($description) && !preg_match(Regex::$urlRegex, $description) && $description != "" && !preg_match('/[A-Z]/', $description) || $title == $description) {
                 $title = $description;
                 $description = Content::crawlCode($raw);
             }
             $media = $this->getMedia($pageUrl);
             $images = count($media) == 0 ? Content::extendedTrim($metaTags["image"]) : $media[0];
             $videoIframe = $media[2];
             if ($images == "") {
                 $images = Content::getImages($raw, $pageUrl, $imageQuantity);
             }
             if ($media != null && $media[0] != "" && $media[1] != "") {
                 $video = 1;
             }
             $title = Content::extendedTrim($title);
             $pageUrl = Content::extendedTrim($pageUrl);
             $description = Content::extendedTrim($description);
             $description = preg_replace(Regex::$scriptRegex, "", $description);
         }
         $finalLink = explode("&", $finalUrl);
         $finalLink = $finalLink[0];
         $description = strip_tags($description);
         $answer = array("title" => $title, "url" => $finalLink, "keywords" => $keywords, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description, "images" => $images, "video" => $video, "videoIframe" => $videoIframe);
         // Changed by bhargav.
         return $answer;
         // end change.
         $result_json = Json::jsonSafe($answer, $header);
         $result_json_decoded = json_decode($result_json);
         $flagged = false;
         if (!isset($result_json_decoded->title)) {
             $title = utf8_encode($title);
             $flagged = true;
         }
         if (!isset($result_json_decoded->description)) {
             $description = utf8_encode($description);
             $flagged = true;
         }
         if ($flagged) {
             $answer = array("title" => $title, "url" => $finalLink, "pageUrl" => $finalUrl, "canonicalUrl" => Url::canonicalPage($pageUrl), "description" => $description, "images" => $images, "video" => $video, "videoIframe" => $videoIframe);
             return Json::jsonSafe($answer, $header);
         } else {
             return $result_json;
         }
     }
     return null;
 }