<?php // crunchbase daily updater // http://static.crunchbase.com/daily/content_web.html include_once 'cb-inc.php'; $m = new MongoClient(); $db = $m->crunchbase; $jsoninv_collection = $db->jsoninv; $jsondata_collection = $db->jsondata; $jsoninv_collection = $db->jsoninv; $fulldata_collection = $db->fulldata; $investments_collection = $db->investments; $investments = load_investments_from_mongo($investments_collection); $fulljson = load_objs_list_from_mongo($jsondata_collection); $data = askhost("http://static.crunchbase.com/daily/content_web.html"); // person // organization $ret = $data; while ($ret = stristr($ret, "person/")) { $pos = strpos($ret, "\""); $obj = substr($ret, 0, $pos); $ret = substr($ret, $pos + 1); echo $obj . "\n"; update_object_and_related($obj); } $ret = $data; while ($ret = stristr($ret, "organization/")) { $pos = strpos($ret, "\""); $obj = substr($ret, 0, $pos); $ret = substr($ret, $pos + 1); echo $obj . "\n";
$fulldata_collection = $db->fulldata; //$fullobj=load_objs_list_from_mongo($fulldata_collection); $investments_collection = $db->investments; //$investments_collection->drop(); $investments_collection = $db->investments; $investments = load_investments_list_from_mongo($investments_collection); $jsondata_collection = $db->jsondata; $jsoninv_collection = $db->jsoninv; //$objjsons=load_objs_list_from_mongo($jsondata_collection); //$invjsons=load_objs_list_from_mongo($jsoninv_collection); while (true) { $cursor1 = $org_collection->find(); $items_count = $cursor1->count(); $current_item = 0; $fulljson = load_objs_list_from_mongo($jsondata_collection); $fullinv = load_objs_list_from_mongo($jsoninv_collection); echo "total records: " . $items_count . " resuming at {$resumerec}\n"; foreach ($cursor1 as $doc) { $current_item++; if ($resumerec > $current_item) { continue; } // if (isset($fullobj[$doc['id']])) { // echo "\n already exist, skipping\n"; // continue; // object already exist // } // get org data //echo "!"; if (0 == fmod($current_item, 100)) { echo "({$current_item})\n"; }
function prepare_report($db) { $fulldata_collection = $db->fulldata; $investments_collection = $db->investments; $investments = load_investments_from_mongo($investments_collection); $jsondata_collection = $db->jsondata; $fulljson = load_objs_list_from_mongo($jsondata_collection); $fname = tempnam("/tmp", "cb-rep"); $fw = fopen($fname, "w"); fputs($fw, "investor_name,investment_sum_usd,funding_series,funding_type,recipient,date,year,month, quarter, category, investment_range\n"); $cursor1 = $fulldata_collection->find(); foreach ($cursor1 as $doc) { $name = $doc['name']; $inv = $doc['investments']; $markets = $doc['markets']; foreach ($inv as $investment) { if (!isset($investments[$investment]['s'])) { continue; } $funding_sum = $investments[$investment]['s']; $funding_type = $investments[$investment]['t']; $funding_date = $investments[$investment]['d']; $funding_round = $investments[$investment]['r']; $funding_company = $investments[$investment]['c']; $line = array(); $line[0] = $name; $line[1] = $funding_sum; $line[2] = $funding_round; $line[3] = $funding_type; $line[4] = $funding_company; $line[5] = cps_date_from_utime($funding_date); $line[6] = year_from_utime($funding_date); $line[7] = month_from_utime($funding_date); $line[8] = quarter_from_utime($funding_date); $line[9] = null; $line[10] = get_range($funding_sum); foreach ($markets as $market) { $line[9] = $market->name; fputcsv($fw, $line); } // each market } // each investment } //each investor fclose($fw); $zip = new ZipArchive(); $filename = $fname . "-report.zip"; if ($zip->open($filename, ZIPARCHIVE::CREATE) !== TRUE) { die("cant open <{$filename}>\n"); } $zip->addFile($fname, "data.csv"); $zip->close(); @unlink($fname); return $filename; }