public function saveMeta($id, $predicate) { if ($meta = $this->hasMeta($id)) { $meta->predicate = $predicate; $meta->save(); } else { $meta = new ScraperMeta(); $meta->scraper = 'OsImportAddresses'; $meta->namespace = $id; $meta->predicate = $predicate; $meta->value = 1; $meta->save(); } return $meta; }
public function saveMeta($namespace, $predicate, $value) { //use class variable if in testMode if ($this->testMode) { if (isset($this->testMeta[$namespace])) { $this->testMeta[$namespace][$predicate] = $value; } else { $this->testMeta[$namespace] = array($predicate => $value); } return; } if (!($meta = $this->_getMeta($namespace, $predicate))) { $meta = new ScraperMeta(); $meta->scraper = $this->getName(); $meta->namespace = $namespace; $meta->predicate = $predicate; } $meta->value = $value; $meta->save(); }
protected function execute($arguments = array(), $options = array()) { if (!$this->safeToRun()) { print "Script already running!\n"; die; } $this->init($arguments, $options); $session = $options['session']; $start_id = $options['start_id']; if ($session) { $sql = 'SELECT * FROM scraper_meta WHERE scraper = ? and namespace = ? and predicate = ?'; $stmt = $this->db->execute($sql, array('OsUpdate', $session, 'last_scraped')); $metas = $stmt->fetchAll(PDO::FETCH_ASSOC); if (count($metas)) { $start_id = $metas[0]['value']; } else { $meta = new ScraperMeta(); $meta->scraper = 'OsUpdate'; $meta->namespace = $session; $meta->predicate = 'last_scraped'; $meta->value = $start_id - 1; $meta->save(); } } $entity_ids = $this->getEntities($options['limit'], $start_id); foreach ($entity_ids as $entity_id) { //$this->printDebug("*******************************"); //get person record $sql = 'SELECT * FROM person WHERE entity_id = ?'; $stmt = $this->db->execute($sql, array($entity_id)); if (!($donorPerson = $stmt->fetch(PDO::FETCH_ASSOC))) { if ($this->debugMode) { print "* Can't find Person record for donor with entity_id " . $id . "; skipping..."; } return; } $this->printDebug(PersonTable::getLegalName($donorPerson)); $trans = $this->getTransactions($entity_id); $verified_donations = $this->getDonations($trans); $out = array(); foreach ($verified_donations as $key => $subarr) { foreach ($subarr as $subkey => $subvalue) { $out[$subkey][$key] = $subvalue; } } $verified_fields = array_map(array_unique, $out); $trans = $this->getTransactions($entity_id, 0, 1); $fields_to_check = array('donor_name', 'street', 'city', 'state', 'zip', 'employer_raw', 'org_raw', 'title_raw', 'gender', 'suffix'); foreach ($fields_to_check as $f) { $this->printDebug($f . ": " . implode(",", $verified_fields[$f])); } $unverified_donations = $this->getDonations($trans); $this->donationMatches = array(); $this->donationNonmatches = array(); foreach ($unverified_donations as $ud) { if ($this->namesAreCompatible($ud, $donorPerson)) { $mat = $this->checkForMatch($ud, $verified_donations, $fields_to_check, $verified_fields); $ud['reason'] = $mat[1]; if ($mat[0] == 1) { $this->donationMatches[] = $ud; } else { $this->donationNonmatches[] = $ud; } } } $fields_to_check[] = 'reason'; $this->printDebug("\nSUCCESSES"); foreach ($this->donationMatches as $dm) { //mark donation matches as verified $sql = 'UPDATE os_entity_transaction SET is_verified = 1, is_synced = (is_verified = is_processed), reviewed_at = ?, reviewed_by_user_id = ? WHERE entity_id = ? AND cycle = ? AND transaction_id = ?'; $stmt = $this->db->execute($sql, array(date('Y-m-d H:i:s'), 1, $entity_id, $dm['cycle'], $dm['row_id'])); $str = ''; foreach ($fields_to_check as $f) { $str .= $f . ": " . $dm[$f] . "; "; } $this->printDebug("\t" . $str); } $this->printDebug("\nFAILURES"); foreach ($this->donationNonmatches as $dm) { //mark donation non-matches as unverified $sql = 'UPDATE os_entity_transaction SET is_verified = 0, is_synced = (is_verified = is_processed), reviewed_at = ?, reviewed_by_user_id = ? WHERE entity_id = ? AND cycle = ? AND transaction_id = ?'; $stmt = $this->db->execute($sql, array(date('Y-m-d H:i:s'), 1, $entity_id, $dm['cycle'], $dm['row_id'])); $str = ''; foreach ($fields_to_check as $f) { $str .= $f . ": " . $dm[$f] . "; "; } $this->printDebug("\t" . $str); } $sql = 'UPDATE scraper_meta SET value = ? WHERE scraper = ? and namespace = ? and predicate = ?'; $stmt = $this->db->execute($sql, array($entity_id, 'OsUpdate', $session, 'last_scraped')); $this->printDebug("*******************************"); } }
public function logCompany($company, $empty = false) { $year = max($this->years); $namespace = $empty ? 'nodata' : $this->log_namespace; //check for existing log entry $q = LsDoctrineQuery::create()->from('ScraperMeta s')->where('s.scraper = ?', get_class($this))->andWhere('s.namespace = ?', $namespace)->andWhere('s.predicate = ?', $company->id)->andWhere('s.value = ?', $year); if ($q->count()) { return; } $meta = new ScraperMeta(); $meta->scraper = get_class($this); $meta->namespace = $namespace; $meta->predicate = $company->id; $meta->value = $year; if (!$this->testMode) { $meta->save(); } $this->printDebug("Logged complete scrape of " . $this->entity->name); }