public function run() { ini_set('user_agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'); //ini_set('output_buffering','0'); $mtime = microtime(); $mtime = explode(" ", $mtime); $mtime = $mtime[1] + $mtime[0]; $starttime = $mtime; $sessionCount = 0; ob_end_flush(); $surname = Datascrape::get_new_surname(); $proxy_count = \Model_Proxy::find()->where('fail_count', '<', static::$current_max_count)->order_by('created_at', 'DESC'); while ($proxy_count->count() == 0) { static::$current_max_count++; echo "Proxy Count Now: " . static::$current_max_count; @ob_flush(); $proxy_count = \Model_Proxy::find()->where('fail_count', '<', static::$current_max_count); } while ($surname && $proxy_count->count() > 0) { $town = Datascrape::get_next_search($surname['last_town']); $html = Datascrape::get_html_dom($surname['surname'], $town['town']); if (!is_object($html)) { echo "No Content\n"; print $html; } else { $pagesArray = $html->find(".page_nav a"); if (count($pagesArray) > 0) { $totalPages = $pagesArray[count($pagesArray) - 2]; $totalPageNumbers = (int) $totalPages->plaintext; } else { $totalPageNumbers = 1; } echo $totalPageNumbers . " pages of " . $surname['surname'] . "'s in " . $town['town'] . "\n"; @ob_flush(); $results = array(); $runningCount = 0; $got_error = FALSE; for ($i = 1; $i <= $totalPageNumbers; $i++) { $proxy_count = \Model_Proxy::find()->where('fail_count', '<', static::$current_max_count)->order_by('created_at', 'DESC'); if ($i > 1) { $html = Datascrape::get_html_dom($surname['surname'], $town['town'], $i); } if (!is_object($html)) { $got_error = TRUE; } else { foreach ($html->find(".record") as $e) { $name = array_reverse(explode(" ", Datascrape::cleanString(@$e->find(".name", 0)->plaintext))); $singleResult['surname'] = array_shift($name); $singleResult['forename'] = implode(" ", $name); $address = explode(", ", Datascrape::cleanString(@$e->find(".address", 0)->innertext)); $singleResult['add1'] = isset($address[0]) ? $address[0] : ""; $singleResult['add2'] = isset($address[2]) && substr($address[2], 0, 11) != "<span class" ? $address[2] : $address[1]; $singleResult['postcode'] = @Datascrape::cleanString($e->find(".postcode", 0)->plaintext, TRUE); $tmpTel = str_replace(array('(', ')', ' '), array('', '', ''), @Datascrape::cleanString($e->find(".telnum", 0)->plaintext, TRUE)); $singleResult['telephone'] = substr($tmpTel, 0, 1) == 0 ? substr($tmpTel, 1) : $tmpTel; $results[] = $singleResult; echo "."; @ob_flush(); print_r($singleResult); } } echo "\n"; @ob_flush(); } // Loop through all results and add to database foreach ($results as $result) { if (!Datascrape::is_dupe($result['forename'], $result['telephone'])) { $sessionCount++; $new = new \Model_Selfgeneration(); $new->fname = $result['forename']; $new->sname = $result['surname']; $new->add1 = $result['add1']; $new->add2 = $result['add2']; $new->postcode = $result['postcode']; $new->telephone = $result['telephone']; $new->save(); } } if (!$got_error) { $update_surname = \Model_Surname::find($surname['id']); $update_surname->last_town = $town['id']; $update_surname->save(); } } $mtime = microtime(); $mtime = explode(" ", $mtime); $mtime = $mtime[1] + $mtime[0]; $endtime = $mtime; $totaltime = $endtime - $starttime; $timeMins = $totaltime / 60; $total = \Model_Selfgeneration::find()->count(); echo number_format($sessionCount, 0) . " This Session. " . number_format($total, 0) . " To Date.\n" . number_format((int) ceil($sessionCount / $timeMins), 0) . " Per Minute. " . number_format((int) ceil($sessionCount / $timeMins) * 60, 0) . " Per Hour. " . number_format((int) ceil($sessionCount / $timeMins) * 60 * 24, 0) . " Per Day."; echo "\n\n"; @ob_flush(); $surname = Datascrape::get_new_surname(); $proxy_count = \Model_Proxy::find()->where('fail_count', '<', '6'); } }