/** * Extract the site title and description from HTML tags */ function lxExtractSiteInfo() { global $json, $DB, $C; require_once "{$GLOBALS['BASE_DIR']}/includes/htmlparser.class.php"; $link = array('site_url' => $_REQUEST['url'], 'allow_redirect' => TRUE, 'recip_url' => null); $result = ScanLink($link); if ($result['site_url']['working']) { $parser = new PageParser(); $parser->parse($result['site_url']['html']); $title = mb_convert_encoding($parser->title, 'ISO-8859-1', mb_detect_encoding($parser->title, 'auto')); $description = mb_convert_encoding($parser->description, 'ISO-8859-1', mb_detect_encoding($parser->description, 'auto')); $keywords = mb_convert_encoding($parser->keywords, 'ISO-8859-1', mb_detect_encoding($parser->keywords, 'auto')); echo $json->encode(array('status' => JSON_SUCCESS, 'title' => html_entity_decode(trim($title)), 'description' => html_entity_decode(trim($description)), 'keywords' => trim(FormatKeywords(html_entity_decode($keywords))))); } else { echo $json->encode(array('status' => JSON_FAILURE)); } }
function &ScanGallery(&$gallery, &$category, &$whitelisted, $all_images = FALSE) { require_once "{$GLOBALS['BASE_DIR']}/includes/http.class.php"; require_once "{$GLOBALS['BASE_DIR']}/includes/htmlparser.class.php"; // Setup default values $results = array('thumbnails' => 0, 'links' => 0, 'format' => FMT_PICTURES, 'has_recip' => FALSE, 'has_2257' => FALSE, 'thumbs' => array(), 'server_match' => TRUE); // Download the gallery page $http = new Http(); $http_result = $http->Get($gallery['gallery_url'], $whitelisted['allow_redirect']); // Record the request results $results = array_merge($results, $http->request_info); $results['page_hash'] = md5($http->body); $results['gallery_ip'] = GetIpFromUrl($http->end_url); $results['bytes'] = intval($results['size_download']); $results['html'] = $http->body; $results['headers'] = trim($http->raw_response_headers); $results['status'] = $http->response_headers['status']; $results['success'] = $http_result; $results['errstr'] = $http->errstr; $results['end_url'] = $http->end_url; if (!$http_result) { $http_result = null; return $results; } // Check if reciprocal link and 2257 code are present $results['has_recip'] = CheckReciprocal($http->body); $results['has_2257'] = Check2257($http->body); // Extract information from the gallery HTML $parser = new PageParser($http->end_url, $category['pics_extensions'], $category['movies_extensions']); $parser->parse($http->body); $results['links'] = $parser->num_links; if ($parser->num_content_links > 0) { if ($parser->num_picture_links > $parser->num_movie_links) { $results['format'] = FMT_PICTURES; $results['thumbnails'] = $parser->num_picture_links; $results['preview'] = $parser->thumbs['pictures'][array_rand($parser->thumbs['pictures'])]['full']; $results['thumbs'] = array_values($parser->thumbs['pictures']); } else { $results['format'] = FMT_MOVIES; $results['thumbnails'] = $parser->num_movie_links; $results['preview'] = $parser->thumbs['movies'][array_rand($parser->thumbs['movies'])]['full']; $results['thumbs'] = array_values($parser->thumbs['movies']); } } else { if ($all_images) { $results['thumbnails'] = count($parser->images); $results['preview'] = $parser->images[array_rand($parser->images)]['full']; $results['thumbs'] = array_values($parser->images); } } // Check that gallery content is hosted on same server as the gallery itself $parsed_gallery_url = parse_url($results['end_url']); $parsed_gallery_url['host'] = preg_quote(preg_replace('~^www\\.~', '', $parsed_gallery_url['host'])); foreach ($results['thumbs'] as $thumb) { $parsed_content_url = parse_url($thumb['content']); if (!preg_match("~{$parsed_gallery_url['host']}~", $parsed_content_url['host'])) { $results['server_match'] = FALSE; break; } } $parser->Cleanup(); unset($parser); $http->Cleanup(); unset($http); return $results; }
} } else { $exception = $exceptions['connect']; } } else { // No reciprocal link found if ($link['recip_required'] && !$scan_result['site_url']['has_recip'] && !$scan_result['recip_url']['has_recip']) { $exception |= $exceptions['norecip']; } // Check the blacklist if (($blacklisted = CheckBlacklistLink($link)) !== FALSE) { $exception |= $exceptions['blacklist']; $scan_result['blacklist_item'] = $blacklisted[0]['match']; } $parser = new PageParser(); $parser->parse($scan_result['site_url']['html']); $extracted_title = html_entity_decode(trim($parser->title)); $extracted_description = html_entity_decode(trim($parser->description)); $extracted_keywords = trim(FormatKeywords(html_entity_decode($parser->keywords))); if ($configuration['process_get_title'] && IsEmptyString($link['title'])) { $updates['placeholders'][] = '#=?'; $updates['binds'][] = 'title'; $updates['binds'][] = $extracted_title; } if ($configuration['process_get_description'] && IsEmptyString($link['description'])) { $updates['placeholders'][] = '#=?'; $updates['binds'][] = 'description'; $updates['binds'][] = $extracted_description; } if ($configuration['process_get_keywords'] && IsEmptyString($link['keywords'])) { $updates['placeholders'][] = '#=?';