Example #1
0
File: main.php Project: m4ker/exam
 */
ini_set('display_errors', 'On');
error_reporting(E_ALL);
date_default_timezone_set('Asia/Shanghai');
$company_start = 1;
$company_total = 31068;
$company_page = 'https://www.itjuzi.com/company/%d';
$total = 1000;
// 采集数量
$results = [];
// 存放采集结果
$start_time = microtime(true);
for ($i = $company_start; $i <= $company_total; $i++) {
    $content = fetch_content(sprintf($company_page, $i));
    $homepage = get_company_homepage($content);
    $data = ['name' => get_company_name($content), 'products' => get_company_products($content), 'location' => get_company_location($content), 'level' => get_company_level($content), 'jobs_link' => $homepage ? get_company_jobs_link(fetch_content($homepage), $homepage) : ''];
    if ($data['name']) {
        $results[] = $data;
        echo $i . ' ' . count($results) . ' ' . $data['name'] . ' ' . $data['jobs_link'] . "\n";
    }
    // 如果达到采集数量则停止
    if (count($results) >= $total) {
        break;
    }
}
// 写入文件并统计
file_put_contents(dirname(__FILE__) . DIRECTORY_SEPARATOR . 'companies.json', json_encode($results));
echo "got jobs link: " . count(array_filter(array_column($results, 'jobs_link'))) . "\n";
echo "use:" . (microtime(true) - $start_time) . "s\n";
exit(" done!");
// 分析招聘链接
Example #2
0
<?php

if (!isset($_GET['file'])) {
    die;
}
if (strpos($_GET['file'], (isset($_SERVER['HTTPS']) ? 'https|' : 'http|') . $_SERVER['SERVER_NAME']) === false) {
    die;
}
require_once '../lib/class.mimetype.php';
$mime = new mimetype();
$fPath = str_replace('http|', 'http://', $_GET['file']);
$fPath = str_replace('https|', 'https://', $fPath);
$fType = $mime->getType($fPath);
$fName = basename($fPath);
$origname = preg_replace('/_#_#\\d*/', '', $fName);
$fContent = fetch_content($fPath);
output_content($fContent, $origname);
function fetch_content($url)
{
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_HEADER, 0);
    ob_start();
    curl_exec($ch);
    curl_close($ch);
    $fContent = ob_get_contents();
    ob_end_clean();
    return $fContent;
}
function output_content($content, $name)
{