function get_hero_main_info($page) { $document = phpQuery::newDocument($page); $hero_info['name'] = $document->find(".p-header > h1")->text(); $string_explode = explode(" ", strtolower($hero_info['name'])); $slug = implode("_", $string_explode); $hero_info['slug'] = $slug; $table = $document->find('table'); $hero_info['table'] = mysql_real_escape_string(table_img_replace($table)); if (!$hero_info['table']) { $hero_info['table'] = 0; } $document->find('.bio > h2')->remove(); $hero_info['description'] = mysql_real_escape_string($document->find('.bio')->text()); if (!$hero_info['description']) { $hero_info['description'] = 0; } $img_link = "http://dota2.ru" . $document->find('.hero > img')->attr('src'); $path = "images/heroes_full/" . $slug; $filename = $slug . ".jpg"; $server_img_link = download_img_in_folder($img_link, $filename, $path); if ($server_img_link) { $hero_info['image'] = mysql_real_escape_string($server_img_link); } $img_link = 'http://dota2.ru/img/heroes/' . $slug . '/m_icon.jpg'; $path = "images/heroes_icons"; $filename = $slug . ".jpg"; $server_img_link = download_img_in_folder($img_link, $filename, $path); if ($server_img_link) { $hero_info['icon'] = mysql_real_escape_string($server_img_link); } $img_link = "http://dota2.ru/img/heroes/" . $slug . "/icon.jpg"; $path = "images/heroes_icons_big"; $filename = $slug . ".jpg"; $server_img_link = download_img_in_folder($img_link, $filename, $path); if ($server_img_link) { $hero_info['icon_big'] = mysql_real_escape_string($server_img_link); } return $hero_info; }
function parseItems($links = null) { $page = get_hero_page("http://dota2.ru/items/"); // Получение страницы и создание объекта из ее html if ($page['error_no'] != 0) { echo "Ошибка получения страницы героя</br>" . $page['error'] . "</br>"; die; } else { $html = $page["page"]; } $document = phpQuery::newDocument($html); // Получение страницы и создание объекта из ее html if (is_null($links)) { $item_links_with_info = $document->find("#list > .item > a"); } else { foreach ($links as $link) { $item_links_with_info[] = $document->find("#list > .item > a[href='" . trim($link) . "']"); } } foreach ($item_links_with_info as $item_link) { file_put_contents(__DIR__ . "/logs/parse_ithems.log", date("Y-m-d H:i:s") . ": начинаю парсить предмет" . $item_link->attr('href') . "\r\n", FILE_APPEND); $item_html_obj = pq($item_link); $img_src = $item_html_obj->find('.tooltipe')->attr('src'); $name_and_slug = item_img_name_and_slug(trim($img_src)); $item['slug'] = mysql_real_escape_string($name_and_slug["slug"]); $path = "/images/ithems/" . $name_and_slug["filename"] . ".jpg"; $img_link = "http://dota2.ru" . $img_src; if (download_img_in_folder($img_link, $path)) { $item['image'] = mysql_real_escape_string($path); } else { continue; } $item['info'] = get_item_info($item_html_obj->find('.tooltipe-content')); $items[] = $item; } take_items_to_bd($items); }