*/ ini_set('display_errors', 'On'); error_reporting(E_ALL); date_default_timezone_set('Asia/Shanghai'); $company_start = 1; $company_total = 31068; $company_page = 'https://www.itjuzi.com/company/%d'; $total = 1000; // 采集数量 $results = []; // 存放采集结果 $start_time = microtime(true); for ($i = $company_start; $i <= $company_total; $i++) { $content = fetch_content(sprintf($company_page, $i)); $homepage = get_company_homepage($content); $data = ['name' => get_company_name($content), 'products' => get_company_products($content), 'location' => get_company_location($content), 'level' => get_company_level($content), 'jobs_link' => $homepage ? get_company_jobs_link(fetch_content($homepage), $homepage) : '']; if ($data['name']) { $results[] = $data; echo $i . ' ' . count($results) . ' ' . $data['name'] . ' ' . $data['jobs_link'] . "\n"; } // 如果达到采集数量则停止 if (count($results) >= $total) { break; } } // 写入文件并统计 file_put_contents(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'companies.json', json_encode($results)); echo "got jobs link: " . count(array_filter(array_column($results, 'jobs_link'))) . "\n"; echo "use:" . (microtime(true) - $start_time) . "s\n"; exit(" done!"); // 分析招聘链接
<?php if (!isset($_GET['file'])) { die; } if (strpos($_GET['file'], (isset($_SERVER['HTTPS']) ? 'https|' : 'http|') . $_SERVER['SERVER_NAME']) === false) { die; } require_once '../lib/class.mimetype.php'; $mime = new mimetype(); $fPath = str_replace('http|', 'http://', $_GET['file']); $fPath = str_replace('https|', 'https://', $fPath); $fType = $mime->getType($fPath); $fName = basename($fPath); $origname = preg_replace('/_#_#\\d*/', '', $fName); $fContent = fetch_content($fPath); output_content($fContent, $origname); function fetch_content($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HEADER, 0); ob_start(); curl_exec($ch); curl_close($ch); $fContent = ob_get_contents(); ob_end_clean(); return $fContent; } function output_content($content, $name) {