Example #1
0
/**
* Extract the site title and description from HTML tags
*/
function lxExtractSiteInfo()
{
    global $json, $DB, $C;
    require_once "{$GLOBALS['BASE_DIR']}/includes/htmlparser.class.php";
    $link = array('site_url' => $_REQUEST['url'], 'allow_redirect' => TRUE, 'recip_url' => null);
    $result = ScanLink($link);
    if ($result['site_url']['working']) {
        $parser = new PageParser();
        $parser->parse($result['site_url']['html']);
        $title = mb_convert_encoding($parser->title, 'ISO-8859-1', mb_detect_encoding($parser->title, 'auto'));
        $description = mb_convert_encoding($parser->description, 'ISO-8859-1', mb_detect_encoding($parser->description, 'auto'));
        $keywords = mb_convert_encoding($parser->keywords, 'ISO-8859-1', mb_detect_encoding($parser->keywords, 'auto'));
        echo $json->encode(array('status' => JSON_SUCCESS, 'title' => html_entity_decode(trim($title)), 'description' => html_entity_decode(trim($description)), 'keywords' => trim(FormatKeywords(html_entity_decode($keywords)))));
    } else {
        echo $json->encode(array('status' => JSON_FAILURE));
    }
}
Example #2
0
function &ScanGallery(&$gallery, &$category, &$whitelisted, $all_images = FALSE)
{
    require_once "{$GLOBALS['BASE_DIR']}/includes/http.class.php";
    require_once "{$GLOBALS['BASE_DIR']}/includes/htmlparser.class.php";
    // Setup default values
    $results = array('thumbnails' => 0, 'links' => 0, 'format' => FMT_PICTURES, 'has_recip' => FALSE, 'has_2257' => FALSE, 'thumbs' => array(), 'server_match' => TRUE);
    // Download the gallery page
    $http = new Http();
    $http_result = $http->Get($gallery['gallery_url'], $whitelisted['allow_redirect']);
    // Record the request results
    $results = array_merge($results, $http->request_info);
    $results['page_hash'] = md5($http->body);
    $results['gallery_ip'] = GetIpFromUrl($http->end_url);
    $results['bytes'] = intval($results['size_download']);
    $results['html'] = $http->body;
    $results['headers'] = trim($http->raw_response_headers);
    $results['status'] = $http->response_headers['status'];
    $results['success'] = $http_result;
    $results['errstr'] = $http->errstr;
    $results['end_url'] = $http->end_url;
    if (!$http_result) {
        $http_result = null;
        return $results;
    }
    // Check if reciprocal link and 2257 code are present
    $results['has_recip'] = CheckReciprocal($http->body);
    $results['has_2257'] = Check2257($http->body);
    // Extract information from the gallery HTML
    $parser = new PageParser($http->end_url, $category['pics_extensions'], $category['movies_extensions']);
    $parser->parse($http->body);
    $results['links'] = $parser->num_links;
    if ($parser->num_content_links > 0) {
        if ($parser->num_picture_links > $parser->num_movie_links) {
            $results['format'] = FMT_PICTURES;
            $results['thumbnails'] = $parser->num_picture_links;
            $results['preview'] = $parser->thumbs['pictures'][array_rand($parser->thumbs['pictures'])]['full'];
            $results['thumbs'] = array_values($parser->thumbs['pictures']);
        } else {
            $results['format'] = FMT_MOVIES;
            $results['thumbnails'] = $parser->num_movie_links;
            $results['preview'] = $parser->thumbs['movies'][array_rand($parser->thumbs['movies'])]['full'];
            $results['thumbs'] = array_values($parser->thumbs['movies']);
        }
    } else {
        if ($all_images) {
            $results['thumbnails'] = count($parser->images);
            $results['preview'] = $parser->images[array_rand($parser->images)]['full'];
            $results['thumbs'] = array_values($parser->images);
        }
    }
    // Check that gallery content is hosted on same server as the gallery itself
    $parsed_gallery_url = parse_url($results['end_url']);
    $parsed_gallery_url['host'] = preg_quote(preg_replace('~^www\\.~', '', $parsed_gallery_url['host']));
    foreach ($results['thumbs'] as $thumb) {
        $parsed_content_url = parse_url($thumb['content']);
        if (!preg_match("~{$parsed_gallery_url['host']}~", $parsed_content_url['host'])) {
            $results['server_match'] = FALSE;
            break;
        }
    }
    $parser->Cleanup();
    unset($parser);
    $http->Cleanup();
    unset($http);
    return $results;
}
Example #3
0
         }
     } else {
         $exception = $exceptions['connect'];
     }
 } else {
     // No reciprocal link found
     if ($link['recip_required'] && !$scan_result['site_url']['has_recip'] && !$scan_result['recip_url']['has_recip']) {
         $exception |= $exceptions['norecip'];
     }
     // Check the blacklist
     if (($blacklisted = CheckBlacklistLink($link)) !== FALSE) {
         $exception |= $exceptions['blacklist'];
         $scan_result['blacklist_item'] = $blacklisted[0]['match'];
     }
     $parser = new PageParser();
     $parser->parse($scan_result['site_url']['html']);
     $extracted_title = html_entity_decode(trim($parser->title));
     $extracted_description = html_entity_decode(trim($parser->description));
     $extracted_keywords = trim(FormatKeywords(html_entity_decode($parser->keywords)));
     if ($configuration['process_get_title'] && IsEmptyString($link['title'])) {
         $updates['placeholders'][] = '#=?';
         $updates['binds'][] = 'title';
         $updates['binds'][] = $extracted_title;
     }
     if ($configuration['process_get_description'] && IsEmptyString($link['description'])) {
         $updates['placeholders'][] = '#=?';
         $updates['binds'][] = 'description';
         $updates['binds'][] = $extracted_description;
     }
     if ($configuration['process_get_keywords'] && IsEmptyString($link['keywords'])) {
         $updates['placeholders'][] = '#=?';