static function getByModelAndObjectQuery($model, Doctrine_Record $object) { if (!$object->exists()) { throw new Exception("Can't get " . LsString::pluralize($model) . " by new object"); } $alias = substr(strtolower($model), 0, 1); return LsDoctrineQuery::create()->from($model . ' ' . $alias)->where($alias . '.object_model = ? AND ' . $alias . '.object_id = ?', array(get_class($object), $object->id)); }
protected function execute($arguments = array(), $options = array()) { $configuration = ProjectConfiguration::getApplicationConfiguration($options['application'], $options['env'], true); $databaseManager = new sfDatabaseManager($configuration); $databaseManager->initialize($configuration); $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative'))->addWhere('summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ? OR summary like ?', array('(daughter%', '(son%', '(father%', '(mother%', '(cousin%', '(husband%', '(wife%', '(brother%', '(sister%'))->orderBy('person.name_last'); $members = $q->execute(); foreach ($members as $member) { if (preg_match('/\\([^\\)]*\\)/isu', $member->summary, $match)) { echo $member->name . ":\n"; if (preg_match_all('/(brother|sister|daughter|mother|father|wife|husband|cousin)\\sof\\s+([^\\;\\)\\,]*)(\\;|\\)|\\,)/isu', $match[0], $matches, PREG_SET_ORDER)) { foreach ($matches as $m) { echo "\t\t" . $m[1] . ' : of : ' . $m[2] . "\n"; $m[2] = str_replace('.', '', $m[2]); $parts = LsString::split($m[2]); $q = EntityTable::getByExtensionQuery(array('Person', 'ElectedRepresentative')); foreach ($parts as $part) { $q->addWhere('e.name like ?', '%' . $part . '%'); } $people = $q->execute(); $family = array(); foreach ($people as $person) { echo "\t\t\t\t" . $person->name . "\n"; if ($person->id != $member->id) { $family[] = $person; } } if (count($family) == 1) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('(r.entity1_id = ? or r.entity2_id =?) and (r.entity1_id = ? or r.entity2_id = ?)', array($member->id, $member->id, $person->id, $person->id)); if (!$q->count()) { if ($description2 = FamilyTable::getDescription2($m[1], $family[0]->Gender->id)) { $relationship = new Relationship(); $relationship->setCategory('Family'); $relationship->Entity1 = $member; $relationship->Entity2 = $person; $relationship->description1 = $m[1]; $relationship->description2 = $description2; $relationship->save(); $ref = LsQuery::getByModelAndFieldsQuery('Reference', array('object_model' => 'Entity', 'object_id' => $member->id, 'name' => 'Congressional Biographical Directory'))->fetchOne(); if ($ref) { $relationship->addReference($ref->source, null, null, $ref->name, $ref->source_detail, $ref->publication_date); } echo "-------------------------------added relationship\n"; } } } } } echo "\n"; } } }
public function hasSimilarName($str, $strict = false) { $str = OrgTable::removeSuffixes($str); $str = trim($str); if (!strlen($str)) { return false; } $terms = LsQuery::splitSearchPhrase($str); $matched = false; $names = $this->Entity->getAllNames(); foreach ($terms as &$term) { if (is_array($term)) { foreach ($term as &$t) { $t = LsString::escapeStringForRegex($t); } $term = implode('|', $term); } else { $term = LsString::escapeStringForRegex($term); } } unset($term); if ($terms[0] == 'The') { array_shift($terms); } foreach ($names as $name) { $matched = true; if (!preg_match('/^(The\\s+)?(' . $terms[0] . ')/isu', $name)) { $matched = false; continue; } foreach ($terms as $term) { $new = preg_replace('/((^|\\s)|\\b)(' . $term . ')(\\b|(\\s|$))/isu', ' ', $name, 1); if ($new == $name) { $matched = false; continue; } $name = $new; } $name = trim(OrgTable::removeSuffixes($name)); if ($strict && $matched && strlen($name) > 0 && count(LsString::split($name)) >= $strict) { $matched = false; } if ($matched == true) { break; } } return $matched; }
public function getNameRegex($first_required = false) { $last_re = $this->getLastNameRegex(); $name_first = $this->name_first; if (isset(PersonTable::$shortFirstNames[$name_first])) { $fn_arr = (array) PersonTable::$shortFirstNames[$name_first]; $name_first = $this->name_first . ' ' . implode(' ', $fn_arr); } if ($first_required) { $fm = $this->name_middle . ' ' . $this->name_nick; } else { $fm = $name_first . ' ' . $this->name_middle . ' ' . $this->name_nick; } $fm_arr = preg_split('/[\\s-]+/', $fm, -1, PREG_SPLIT_NO_EMPTY); $initials = ''; foreach ($fm_arr as &$fm) { $len = strlen(LsString::stripNonAlpha($fm)); $fm = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $fm); $initials .= strtoupper($fm[0]); //if string is longer than 3, then if ($len > 3) { $offset = strpos($fm, ']', strpos($fm, ']') + 1) + 1; $str = substr($fm, $offset); $str = str_replace(']', ']?', $str); $fm = substr($fm, 0, $offset) . $str; } } $fm = implode('|', $fm_arr); $separator = '\\b([\'"\\(\\)\\.]{0,3}\\s+|\\.\\s*|\\s?-\\s?)?'; if ($first_required) { $nf_arr = LsString::split($name_first); foreach ($nf_arr as &$nf) { $nf = preg_replace("/(\\p{Ll})/e", "'['.'\\1'.strtoupper('\\1').']'", $nf); } $name_first = implode('|', $nf_arr); $re = '((\\b(' . $name_first . ')' . $separator . '(' . $fm . '|[' . $initials . '])?' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))'; } else { $re = '((\\b(' . $fm . '|[' . $initials . '])' . $separator . '((\\p{L}|[\'\\-])+' . $separator . ')?)+((' . $last_re . ')\\b))'; } return $re; }
public function updateBio($member) { $url = $this->_profileUrlBase . $member->bioguide_id; if ($this->browser->get($url)->responseIsError()) { //Error response (eg. 404, 500, etc) throw new Exception("Couldn't get " . $url); } $this->_bioPageText = $text = LsString::newlinesToSpaces($this->browser->getResponseText()); //get bio if (preg_match('/, <\\/FONT>([^<]+)<\\/(TD|P)>/', $text, $bio)) { $bio = preg_replace('/\\n/', ' ', $bio[1]); $bio = ucfirst(trim(preg_replace('/\\s{2,}/', ' ', $bio))); $bio = LsHtml::replaceEntities($bio); $member->summary = $bio; $this->printDebug("Bio: " . $bio); if (preg_match('/\\b(a(\\s+\\p{L}+){2,8})\\;/isu', $bio, $match)) { $blurb = 'US ' . preg_replace('/a\\s+/isu', '', $match[1]); $member->blurb = $blurb; $this->printDebug("Blurb: " . $blurb); } } else { $this->printDebug("Couldn't find member bio on " . $url); } }
static function parseNyDonations($str) { $re2 = '/(<td.*?>(.*?\\s).*?<.td>\\s*)*?<.tr>/is'; preg_match_all($re2, $str, $matches); $results = array(); foreach ($matches[0] as $match) { $result = array("name" => "", "street" => "", "city" => ""); $arr = preg_split('/<.td>\\s*<td.*?>/is', $match); $name_parts = preg_split('/<br>/is', $arr[0]); if (count($name_parts) > 1 && count($arr) > 5) { if (preg_match('/(inc|llp|llc|p\\.c\\.|pc)\\.?$/is', $name_parts[0], $match)) { $result['name'] = $name_parts[0]; } else { $np = preg_split('/\\,\\s*/is', $name_parts[0]); if (count($np) > 1) { if (count($np) == 3 && stripos($np[2], "jr") !== 0) { $result['name'] = $np[2] . " " . $np[0] . ", " . $np[1]; } else { $result['name'] = $np[1] . " " . $np[0]; } } else { $result['name'] = $np[0]; } } if (count($name_parts) > 1) { $result['street'] = $name_parts[1]; if (count($name_parts) > 2) { $result['city'] = $name_parts[2]; } } $result['amount'] = $arr[1]; $result['date'] = $arr[2]; $result['committee'] = $arr[3]; foreach ($result as &$r) { $r = preg_replace('/(\\n|(<.*?>))/is', "", $r); $r = trim($r); $r = LsString::spacesToSpace($r); } unset($r); $results[] = $result; } } $str = implode("\t", array_keys($results[0])) . "\n"; if (count($results)) { foreach ($results as $r) { $str .= implode("\t", $r); $str .= "\n"; } } $str = trim($str); return $str; }
static function checkUrl($url, $org_name) { $ret = false; if (preg_match('/\\/\\/[^\\/]+\\//isu', $url, $match)) { $url = $match[0]; } $parts = LsString::split($org_name); $all = ''; $no_common = ''; $no_corp = ''; $stripped = ''; $common = array('and', 'the', 'of', 'in', 'at', '&'); $abbrevs = array('Corporation', 'Inc', 'Group', 'LLC', 'LLP', 'Corp', 'Co', 'Cos', 'LP', 'PA', 'Dept', 'Department', 'International', 'Administration'); $both = array_merge($common, $abbrevs); foreach ($parts as $part) { if (!LsArray::inArrayNoCase($part, $common)) { $no_common .= $part[0]; } if (!LsArray::inArrayNoCase($part, $abbrevs)) { $no_corp .= $part[0]; } if (!LsArray::inArrayNoCase($part, $both)) { $stripped .= $part[0]; } $all .= $part[0]; if (stristr($url, $part) && strlen($part) > 1 && !LsArray::inArrayNoCase($part, $both)) { $ret = true; } } if ($ret == false) { if (strlen($all) > 2 && stristr($url, $all)) { $ret = true; } if (strlen($no_common) > 2 && stristr($url, $no_common)) { $ret = true; } if (strlen($no_corp) > 2 && stristr($url, $no_corp)) { $ret = true; } } return $ret; }
protected function processRow($row) { foreach ($row as &$r) { $r = trim($r); } $edit = array('Search Name' => $row['name'], 'Affiliation Name' => $row['affiliation1'], 'Similar Names' => array(), 'New Person' => null, 'Existing Person' => null, 'New Org' => null, 'Existing Org' => null, 'New Relationship' => null); try { $this->db->beginTransaction(); $person = null; $search_person = PersonTable::parseFlatName($row['name']); $similar = $search_person->getSimilarEntitiesQuery(true)->execute(); $matched_bio = false; $similar_ids = array(); foreach ($similar as $s) { $similar_ids[] = $s->id; $sim_re = LsString::escapeStringForRegex($s->name_first); $search_re = LsString::escapeStringForRegex($search_person->name_first); if (preg_match('/^' . $sim_re . '/su', $search_person->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) { continue; } $matched = false; $affils = array(); $ct = 1; $matched_affils = array(); $unmatched_affils = array(); while (isset($row['affiliation' . $ct]) && trim($row['affiliation' . $ct]) != '') { $affil = trim($row['affiliation' . $ct]); $org = $s->checkAffiliations(array($affil)); if ($org) { $matched_affils[] = array($org, $affil); $edit['Existing Org'] = $org->id; break; } else { $unmatched_affils[] = $affil; } $ct++; } if (count($matched_affils)) { $person = $s; break; //$ret[] = array('person' => $s, $matched_affils, $unmatched_affils); } else { /*$str = implode(' ', $unmatched_affils); if (isset($row['bio'])) { $str .= ' ' . $row['bio']; }*/ $bio = $s->getExtendedBio(); foreach ($unmatched_affils as $affil) { $affil = OrgTable::removeSuffixes($affil); $this->printDebug($affil); $this->printDebug($bio); if (preg_match('/' . OrgTable::getNameRegex($affil) . '/su', $bio)) { $matched_bio = true; break; } } if ($matched_bio) { $person = $s; break; } else { $this->printDebug(' ' . $s->name . ' failed'); } } } $edit['Similar Names'] = array_slice($similar_ids, 0, 5); $no_match = false; if (!$person) { if (isset($row['bio']) && trim($row['bio']) != '') { $search_person->summary = $row['bio']; } $search_person->save(); $this->printDebug(' not found, new person saved: ' . $search_person->name); $search_person->addReference($this->source_url, null, null, $this->source_name); $no_match = true; $edit['New Person'] = $search_person->id; $person = $search_person; } else { if (isset($row['bio']) && trim($row['bio']) != '' && !$person->summary) { $person->summary = $row['bio']; $person->save(); } $this->printDebug(' **person found: ' . $person->name); $edit['Existing Person'] = $person->id; } if ($matched_bio || $no_match) { $orgs = OrgTable::getOrgsWithSimilarNames($row['affiliation1'], true); $max = -1; $affiliated_org = null; foreach ($orgs as $org) { $this->printDebug(' found match: ' . $org->name); $ct = $org->getRelatedEntitiesQuery('Person', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 2)->count(); if ($ct > $max) { $affiliated_org = $org; $edit['Existing Org'] = $affiliated_org->id; $max = $ct; } } if (!$affiliated_org) { $affiliated_org = new Entity(); $affiliated_org->addExtension('Org'); if (isset($row['affiliation1_extensions']) && $row['affiliation1_extensions'] != '') { $extensions = explode(',', $row['affiliation1_extensions']); foreach ($extensions as $ext) { $ext = trim($ext); if (in_array($ext, ExtensionDefinitionTable::$extensionNames)) { $affiliated_org->addExtension($ext); } } } else { //$affiliated_org->addExtension('Business'); } $affiliated_org->name = $row['affiliation1']; $affiliated_org->save(); $affiliated_org->addReference($this->source_url, null, null, $this->source_name); $edit['New Org'] = $affiliated_org->id; } $rel = new Relationship(); $rel->Entity1 = $person; $rel->Entity2 = $affiliated_org; $rel->setCategory('Position'); if (isset($row['affiliation1_title']) && $row['affiliation1_title'] != '') { $description = trim($row['affiliation1_title']); $rel->description1 = $description; if ($description == 'Director' || $description == 'Trustee' || preg_match('/^Chair/su', $description)) { $rel->is_board = 1; $rel->is_employee = 0; } } $rel->save(); $rel->addReference($this->source_url, null, null, $this->source_name); $edit['New Relationship'] = $rel->id; } if (isset($row['start_date']) && trim($row['start_date']) != '') { $edit['Relationship']['start_date'] = trim($row['start_date']); } if (isset($row['end_date']) && trim($row['end_date']) != '') { $edit['Relationship']['end_date'] = trim($row['end_date']); } if (isset($row['title']) && trim($row['title']) != '') { $edit['Relationship']['title'] = trim($row['title']); } if (isset($row['notes']) && trim($row['notes']) != '') { $edit['Relationship']['notes'] = trim($row['notes']); } if (isset($row['rank']) && $row['rank'] != '') { $edit['rank'] = $row['rank']; } $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } $this->edits[] = $edit; }
private function importLdaData($lobby_import) { $path = $this->_dir . $lobby_import->filename; $raw = file_get_contents($path); $xml = new SimpleXMLElement($raw); $filings = $xml->Filing; $limit = count($filings); $this->printDebug('importing data from ' . $lobby_import->filename . ' (record ' . $lobby_import->offset . ' of ' . $limit . ')'); for ($n = (int) $lobby_import->offset; $n < $limit; $n++) { $this->_count = $this->_count + 1; if ($this->_count > $this->_limit) { die; } try { $this->db->beginTransaction(); $lobby_import->offset = $n; if ($n == $limit - 1) { $lobby_import->done = 1; } $lobby_import->save(); if (!isset($filings[$n])) { echo 'ok'; var_dump($filings[$n - 1]); var_dump($filings[$n + 1]); $this->printDebug('not set' . $n); $this->db->commit(); continue; } $filing = $filings[$n]; if (!isset($filing->Registrant)) { $this->db->commit(); continue; } //var_dump($filing); $f = new LdaFiling(); $f->federal_filing_id = $filing['ID']; $f->year = $filing['Year']; $f->amount = $filing['Amount']; $f->received = $filing['Received']; $f->import_id = $lobby_import->id; $f->offset = $n; //check for duplicate if (Doctrine::getTable('LdaFiling')->findOneByFederalFilingId($f->federal_filing_id)) { $this->db->commit(); continue; } //set registrant if (!($r = Doctrine::getTable('LdaRegistrant')->findOneByFederalRegistrantId($filing->Registrant['RegistrantID']))) { $r = new LdaRegistrant(); $r->name = LsString::spacesToSpace($filing->Registrant['RegistrantName']); $r->federal_registrant_id = $filing->Registrant['RegistrantID']; $r->address = $filing->Registrant['Address']; $r->description = LsString::spacesToSpace($filing->Registrant['GeneralDescription']); $r->country = $filing->Registrant['RegistrantCountry']; $r->save(); } $f->registrant_id = $r->id; //set client if ($filing->Client) { if (!($c = LsQuery::getByModelAndFieldsQuery('LdaClient', array('registrant_id' => $r->id, 'federal_client_id' => $filing->Client['ClientID']))->execute()->getFirst())) { $c = new LdaClient(); $c->name = LsString::spacesToSpace($filing->Client['ClientName']); $c->federal_client_id = $filing->Client['ClientID']; $c->registrant_id = $r->id; $c->contact_name = LsString::spacesToSpace($filing->Client['ContactFullname']); $c->description = LsString::spacesToSpace($filing->Client['GeneralDescription']); $c->country = $filing->Client['ClientCountry']; $c->state = $filing->Client['ClientState']; $c->save(); } $f->client_id = $c->id; } //set filing type if ($type = (string) $filing['Type']) { //look for existing type if (!($t = Doctrine::getTable('LdaType')->findOneByDescription($type))) { $t = new LdaType(); $t->description = $type; $t->save(); } $f->type_id = $t->id; unset($t); } if ($period = (string) $filing['Period']) { //look for existing period if (!($p = Doctrine::getTable('LdaPeriod')->findOneByDescription($period))) { $p = new LdaPeriod(); $p->description = $period; $p->save(); } $f->period_id = $p->id; } $f->save(); //add lobbyists if ($filing->Lobbyists) { foreach ($filing->Lobbyists->Lobbyist as $lobbyist) { $name = (string) $lobbyist['LobbyistName']; if (!($l = LsQuery::getByModelAndFieldsQuery('LdaLobbyist', array('registrant_id' => $r->id, 'name' => $name))->execute()->getFirst())) { $l = new LdaLobbyist(); $l->name = $name; $l->registrant_id = $r->id; $l->status = $lobbyist['LobbyistStatus']; $l->indicator = $lobbyist['LobbyisteIndicator']; $l->official_position = $lobbyist['OfficialPosition']; $l->save(); } $fl = new LdaFilingLobbyist(); $fl->filing_id = $f->id; $fl->lobbyist_id = $l->id; $fl->save(); unset($fl); unset($l); } } //add govt entities if ($filing->GovernmentEntities) { foreach ($filing->GovernmentEntities->GovernmentEntity as $govt) { $govt = trim($govt['GovEntityName']); if (!($g = Doctrine::getTable('LdaGovt')->findOneByName($govt))) { $g = new LdaGovt(); $g->name = $govt; $g->save(); } $fg = new LdaFilingGovt(); $fg->filing_id = $f->id; $fg->govt_id = $g->id; $fg->save(); unset($fg); unset($g); } } //add issues if ($filing->Issues) { foreach ($filing->Issues->Issue as $issue) { $code = (string) $issue['Code']; if (!($i = Doctrine::getTable('LdaIssue')->findOneByName($code))) { $i = new LdaIssue(); $i->name = $code; $i->save(); } $fi = new LdaFilingIssue(); $fi->filing_id = $f->id; $fi->issue_id = $i->id; $fi->specific_issue = $issue['SpecificIssue']; $fi->save(); unset($fi); unset($i); } } $this->printDebug($f->federal_filing_id); //check for duplicate again if (Doctrine::getTable('LdaFiling')->findOneByFederalFilingId($f->federal_filing_id)) { $this->db->rollback(); continue; } $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } unset($f); unset($r); unset($c); unset($filing); } unset($xml); unset($raw); unset($filings); }
function excerpt($string, $len = 50, $truncateStr = '...', $wholeWords = true) { return LsString::excerpt($string, $len, $truncateStr, $wholeWords); }
public function parseDescriptionStr($str, $corp) { $descriptions = array(); $remains = array(); //cleanup text to be parsed $str = trim($str); $str = str_replace('.', ' ', $str); $str = preg_replace('/\\s{2,}/', ' ', $str); $name_re = LsString::escapeStringForRegex($corp->name); $str = preg_replace('/\\b' . $name_re . '\\b/isu', '', $str); if ($corp->name_nick) { $nick_re = LsString::escapeStringForRegex($corp->name_nick); $str = preg_replace('/\\b' . $nick_re . '\\b/isu', '', $str); } if ($corp->ticker) { $tick_re = LsString::escapeStringForRegex($corp->ticker); $str = preg_replace('/\\b' . $tick_re . '\\b/isu', '', $str); } //split by commas $parts = preg_split('/,|;|\\band\\b|(?<!C[Oo])\\-|\\bAND\\b|\\s&\\s|\\//', $str, -1, PREG_SPLIT_NO_EMPTY); foreach ($parts as $part) { $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); //abbreviation replacements $part = preg_replace('/( |^)(\\w) (\\w) (\\w)( |$)/', '\\2\\3\\4', $part); $part = preg_replace('/(Interim|Acting|Incoming) /i', '', $part); $part = preg_replace('/Sr /i', 'Senior ', $part); $part = preg_replace('/Chf /i', 'Chief ', $part); $part = preg_replace('/( |^)V( |$)/i', ' Vice ', $part); $part = preg_replace('/( |^)VP( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)VC( |$)/i', ' Vice Chairman ', $part); $part = preg_replace('/( |^)Chr( |$)/i', ' Chairman ', $part); $part = preg_replace('/( |^)Ofcr( |$)/i', ' Officer ', $part); $part = preg_replace('/( |^)Vice P( |$)/i', ' Vice President ', $part); $part = preg_replace('/( |^)(Ex|Exec)( |$)/i', ' Executive ', $part); $part = preg_replace('/( |^)EVP( |$)/i', ' Executive Vice President ', $part); $part = preg_replace('/( |^)(Off|Offic|Offcr)( |$)/i', ' Officer ', $part); $part = str_replace('Gen ', 'General ', $part); $part = preg_replace('/( |^)(Op|Oper) /', ' Operating ', $part); $part = preg_replace('/( |^)(Bd|Brd)( |$)/i', ' Board ', $part); $part = preg_replace('/of Board/i', ' of the Board', $part); $part = preg_replace('/( |^)COB( |$)/i', ' Chairman of the Board ', $part); $part = preg_replace('/( |^)(Pres|Prs|Presid|Prsdt|Prsdnt)( |$)/i', ' President ', $part); $part = preg_replace('/( |^)Admin( |$)/i', ' Administrative ', $part); $part = preg_replace('/( |^)Info( |$)/i', ' Information ', $part); $part = preg_replace('/\\bComm\\b/i', 'Committee', $part); $part = preg_replace('/\\bInc\\b/i', '', $part); $part = preg_replace('/( |-|^)(Ch|Chm|Chmn|Chrm|Chrmn|Chair|Chairmain|Chariman)( |$)/i', '\\1Chairman ', $part); $part = preg_replace('/(Sec|Secr|Secy|Secretar|Secreta)( |$)/i', 'Secretary ', $part); $part = str_replace('Vice-', 'Vice ', $part); $part = preg_replace('/( |^)Non /i', ' Non-', $part); $part = preg_replace('/\\bCompl\\b/i', 'Compliance', $part); $part = str_ireplace('of Advisory', 'of the Advisory', $part); $part = preg_replace('/Advisory (Panel|Council)/i', 'Advisory Board', $part); $part = str_ireplace('Independent ', '', $part); $part = str_ireplace('Lead ', '', $part); $part = str_ireplace('Corporate ', '', $part); $part = str_ireplace('Outside ', '', $part); $part = str_ireplace('Non-interested', '', $part); $part = str_ireplace('Interested', '', $part); $part = str_replace('Main ', '', $part); $part = str_ireplace('Presiding ', '', $part); $part = str_ireplace('Founding ', '', $part); $part = str_ireplace('Acctg', 'Accounting', $part); $part = str_ireplace('Chairperson', 'Chairman', $part); $part = str_ireplace('Chairwoman', 'Chairman', $part); $part = str_ireplace("Gen'l", 'General', $part); $part = trim($part); $part = preg_replace('/\\s{2,}/', ' ', $part); $position = array('description' => null, 'note' => array()); if ($part != '') { //look for matching title $p = LsArray::inArrayNoCase($part, PositionTable::$businessPositions); if ($p) { $position['description'] = $p; } else { if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) { $position['description'] = $q->description1; } else { if (count($descriptions) == 0) { $part_splat = LsString::split($part); $note = array(); //$this->printDebug($part); //var_dump($part_splat); $lim = count($part_splat) - 1; for ($i = 0; $i < $lim; $i++) { $note[] = array_pop($part_splat); $part_new = implode(' ', $part_splat); if (strtoupper($part_new) == 'DIRECTOR') { break; } $p = LsArray::inArrayNoCase($part_new, PositionTable::$businessPositions); if ($p) { $position['description'] = $p; } else { if ($q = Doctrine::getTable('Relationship')->findOneByDescription1($position)) { $position['description'] = $q->description1; } } } if (!$position['description']) { $position['description'] = $part; } } else { $descriptions[count($descriptions) - 1]['note'][] = $part; } } } if (isset($position['description'])) { $descriptions[] = $position; } } } return $descriptions; }
public function removeFields($fields) { $diff = array_diff($this->getFieldsArray(), (array) $fields); sort($diff); return $this->fields = LsString::emptyToNull(implode(',', $diff)); }
public function getSummary($str, Entity $e) { $str = LsHtml::replaceEntities($str); $name_re = array(); $name_re[] = $e->getNameRegex(); if ($e->name_nick && $e->name_nick != '') { $name_re[] = LsString::escapeStringForRegex($e->name_nick); } $name_re = implode('|', $name_re); $style_tags = implode('|', LsHtml::$fontStyleTags); $layout_tags = implode('|', LsHtml::$layoutTags); $re = '/((' . $name_re . ')(.*?))<\\/?(' . $layout_tags . ')/isu'; $this->printDebug($re); $results = null; if (preg_match_all($re, $str, $matches)) { $results = $matches[1]; foreach ($results as $result) { $result = LsString::spacesToSpace(LsHtml::stripTags($result)); $this->printDebug($result); } } return $results; }
function __construct($text) { $text = LsHtml::replaceEntities($text); $text = LsString::utf8TransUnaccent($text); $this->text = $text; }
static function getHtmlPersonNames($text) { $name_matches = array(); $re = '/>\\s*\\p{Lu}\'?(\\p{L}+|\\.)?\\s+\\p{Lu}\\.?\\s+\\p{Lu}\\p{L}+(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?/su'; $re2 = '/>\\s*(\\p{Lu}\'?(\\p{L}+|\\.)?\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2}\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su'; $re3 = '/>\\s*(\\p{Lu}\'?\\p{L}+(\\-\\p{Lu}\'?\\p{L}+)?\\,\\s+(\\p{Lu}\'?(\\p{L}+|\\.)?(\\s+(\\p{Lu}\'?(\\s+|\\p{L}+\\s+|\\.\\s*)?){0,2})?)(\\,?\\s+\\p{Lu}\\p{L}{1,4}\\.?)?)\\**\\s*</su'; $text = LsHtml::replaceEntities($text); $name_matches = array(); if (preg_match_all($re2, $text, $matches, PREG_OFFSET_CAPTURE)) { //LOOP THROUGH MATCHES TO CONFIRM NAMES for ($i = 0; $i < count($matches[1]); $i++) { $m = $matches[1][$i]; //echo $m[0] . "\n"; $is_name = false; if (preg_match('/\\s+\\p{Lu}\\.?\\s/', $m[0])) { //echo ' * initial' . "\n"; $is_name = true; } $parts = LsString::split(trim($m[0])); //ADD NAME TO MATCH LIST IF IT FITS CONDITIONS if (in_array($parts[0], LsLanguage::$commonFirstNames)) { //echo ' * first name' . "\n"; $is_name = true; } $q = LsDoctrineQuery::create()->from('Person p')->where('p.name_first = ?', $parts[0]); if ($q->count() > 0) { //echo ' LS name' . "\n"; $is_name = true; } if ($is_name) { $name_matches[] = $m[0]; } /* if ($i != 0) { $beg = $matches[1][$i-1][1]; $tweenstr = substr($text,$beg, $m[1] - $beg); //echo ' tag count: ' . LsHtml::tagCount($tweenstr) . "\n"; } preg_match('/^[^\s]+\s/su',trim($m[0]),$match); $tags = LsHtml::getSurroundingTags($text,$m[1],3);*/ } } if (preg_match_all($re3, $text, $matches, PREG_OFFSET_CAPTURE)) { for ($i = 0; $i < count($matches[1]); $i++) { $m = $matches[1][$i]; //echo $m[0] . "\n"; $person = PersonTable::parseCommaName($m[0]); $name_matches[] = $person->getFullName(false); } } return $name_matches; }
static function parseFlatName($str, $surname = null, $returnArray = false) { $namePrefix = $nameFirst = $nameMiddle = $nameLast = $nameSuffix = $nameNick = null; //to handle multi-word last names like Van der Twerp $sub = null; if ($surname) { $sub = preg_replace('/(^(\\P{L})+|(\\P{L})+$)/u', '', $surname); $sub = preg_replace('/\\s+/is', '_', $sub); $str = str_ireplace($surname, $sub, $str); } //trim and remove periods $str = trim(str_replace('.', ' ', $str)); //remove extra spaces $str = preg_replace('/\\s{2,}/', ' ', $str); //remove anything in parentheses at the end $str = preg_replace('/ \\([^\\)]+\\)/', '', $str); //get prefixes $prefixes = self::$nameParsePrefixes; while ($prefix = current($prefixes)) { if ($str != ($new = preg_replace('/^' . $prefix . ' /i', '', $str))) { if (!LsArray::inArrayNoCase($prefix, LsLanguage::$commonPrefixes)) { $namePrefix .= $prefix . ' '; } $str = trim($new); reset($prefixes); continue; } next($prefixes); } $namePrefix = $namePrefix ? trim($namePrefix) : null; //get suffixes $suffixes = self::$nameParseSuffixes; while ($suffix = current($suffixes)) { if ($str != ($new = preg_replace('/ ' . $suffix . '$/i', '', $str))) { $nameSuffix = $suffix . ' ' . $nameSuffix; $str = trim($new); reset($suffixes); continue; } next($suffixes); } $nameSuffix = $nameSuffix ? trim($nameSuffix) : null; //remove commas left over from suffixes $str = trim(str_replace(',', '', $str)); //find nickname in quotes if (preg_match('/["\']([\\S]+)[\'"]/', $str, $nickFound)) { $nameNick = $nickFound[1] ? $nickFound[1] : $nickFound[2]; $str = trim(preg_replace('/["\']([\\S]+)[\'"]/', '', $str)); } //condense multiple spaces $str = preg_replace('/\\s{2,}/', ' ', $str); //split into parts $parts = explode(' ', $str); switch (count($parts)) { case 1: if ($namePrefix) { $nameFirst = $namePrefix; $nameLast = $parts[0]; $namePrefix = null; } else { if ($nameSuffix) { $nameFirst = $parts[0]; $nameLast = $nameSuffix; $nameSuffix = null; } else { if (strtolower($sub) == strtolower($parts[0])) { $nameLast = $parts[0]; } else { $nameFirst = $parts[0]; } } } break; case 2: $nameFirst = $parts[0]; $nameLast = $parts[1]; break; case 3: $nameFirst = $parts[0]; $nameMiddle = $parts[1]; $nameLast = $parts[2]; break; default: $nameFirst = $parts[0]; $nameLast = $parts[count($parts) - 1]; for ($n = 1; $n < count($parts) - 1; $n++) { $nameMiddle .= $parts[$n] . ' '; } $nameMiddle = trim($nameMiddle); break; } $nameLast = str_replace('_', ' ', $nameLast); $name = array('name_first' => $nameFirst, 'name_last' => $nameLast, 'name_middle' => $nameMiddle, 'name_prefix' => $namePrefix, 'name_suffix' => $nameSuffix, 'name_nick' => $nameNick); foreach ($name as $nk => &$nv) { if ($nv && $nk != 'name_suffix' && $nk != 'name_prefix') { $nv = preg_replace('/^(\\P{L})+|(\\P{L})+$/u', '', $nv); $case = LsString::checkCase($nv); $nv = $case == 'upper' || $case == 'lower' ? LsLanguage::nameize($nv) : $nv; if ($nk != 'name_last') { $nv = LsLanguage::hgCaser($nv, false); } } } unset($nv); if ($returnArray) { return $name; } $person = new Entity(); $person->addExtension('Person'); $person->name_first = $name['name_first']; $person->name_middle = $name['name_middle']; $person->name_last = $name['name_last']; $person->name_nick = $name['name_nick']; $person->name_prefix = $name['name_prefix']; $person->name_suffix = $name['name_suffix']; return $person; }
public function getCleanFirstParagraph() { if ($this->_paragraphs) { return null; } $first = $this->_paragraphs[0]; $first = LsString::spacesToSpace(LsHtml::replaceEntities(LsHtml::stripTags($first))); return $first; }
public function processRow($row) { if (isset($row['url']) && $row['url'] != '' && isset($row['url_name']) && $row['url_name'] != '') { $url = $row['url']; $url_name = $row['url_name']; } else { $url = $this->url; $url_name = $this->url_name; } foreach ($row as &$r) { trim($r); } unset($r); if ($this->entity) { $required = array('entity_name', 'primary_type', 'relationship_category'); } else { $required = array('entity_name', 'primary_type'); } foreach ($required as $req) { if (!isset($row[$req]) || $row[$req] == '') { $this->printDebug('!!! > skipping row, ' . $req . ' not set'); return; } } if ($row['primary_type'] != 'Person' && $row['primary_type'] != 'Org') { $this->printDebug('!!! > primary type not properly set, skipping row...'); return; } if ($this->entity) { $relationship_category = trim($row['relationship_category']); $relationship_category_id = array_search($relationship_category, RelationshipCategoryTable::$categoryNames); if (!$relationship_category_id) { $this->printDebug('!!! > relationship type not properly set, skipping row...'); return; } } $this->printDebug("processing: " . $row['entity_name'] . '......'); if ($row['primary_type'] == 'Person') { $entity2 = PersonTable::parseFlatName($row['entity_name']); $similar_entities = PersonTable::getSimilarQuery2($entity2)->execute(); } else { $entity2 = new Entity(); $entity2->addExtension('Org'); $entity2->setEntityField('name', $row['entity_name']); $similar_entities = OrgTable::getOrgsWithSimilarNames($entity2->name); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n or b to break)'); if ($accept == 'y') { $entity2 = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); break; } else { if ($accept == 'b') { break; } } } $created = false; if (!$matched) { if ($entity2->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity2->name_first . ' ' . $entity2->name_last); } else { $this->printDebug(' New org: ' . $entity2->name); } $accept = $this->readline(' create this new entity? (y or n) '); if ($accept == 'y') { try { $extensions = LsString::split($row['entity_extensions'], '\\s*\\,\\s*'); foreach ($extensions as $extension) { $entity2->addExtension($extension); } $entity2->save(); $entity2->addReference($url, null, null, $url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with extensions for this row'); } $fields = array('summary', 'blurb', 'website'); foreach ($fields as $field) { if (isset($row[$field])) { $entity2[$field] = $row[$field]; } } $entity2->save(); $entity2->addReference($url, null, null, $url_name); $created = true; $this->printDebug(' ' . $entity2->name . ' saved'); //sleep(1); } else { $entity2 = null; } } // create relationship if ($entity2) { if ($this->entity) { $relationship = new Relationship(); if (isset($row['relationship_order']) && $row['relationship_order'] != '') { if ($row['relationship_order'] == '1') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity2 = $this->entity; $relationship->Entity1 = $entity2; } } else { if ($relationship_category == 'Position' || $relationship_category == 'Education') { if ($row['primary_type'] == 'Org') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity1 = $entity2; $relationship->Entity2 = $this->entity; } } else { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } } $relationship->setCategory($relationship_category); $cols = array('description1', 'description2', 'start_date', 'end_date', 'goods', 'amount', 'is_board', 'is_executive', 'is_employee'); foreach ($cols as $col) { if (isset($row[$col]) && $row[$col] != '') { try { $relationship[$col] = $row[$col]; } catch (Exception $e) { $this->printDebug(" could not set {$col} for relationship, skipping"); } } } $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ? and r.id <> ?', array($relationship->entity1_id, $relationship->entity2_id, $relationship->category_id, $relationship->id))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship->save(); $relationship->addReference($url, null, null, $url_name); $this->printDebug(" Relationship saved: {$relationship}\n"); } else { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.entity_id = ? and le.list_id = ?', array($entity2->id, $this->list->id))->fetchOne(); if ($q) { $this->printDebug(' (already on list, skipping...)'); return; } $le = new LsListEntity(); $le->LsList = $this->list; $le->Entity = $entity2; var_dump($row); if (isset($row['rank'])) { echo $row['rank']; $le->rank = $row['rank']; } $le->save(); } } } }
public function executeAddBulk($request) { $this->checkEntity($request, false, false); $this->reference_form = new ReferenceForm(); $this->reference_form->setSelectObject($this->entity); $this->add_bulk_form = new AddBulkForm(); //get possible default categories $this->categories = LsDoctrineQuery::create()->select('c.name, c.name')->from('RelationshipCategory c')->orderBy('c.id')->fetchAll(PDO::FETCH_KEY_PAIR); array_unshift($this->categories, ''); if ($request->isMethod('post') && in_array($request->getParameter('commit'), array('Begin', 'Continue'))) { if ($request->hasParameter('ref_id')) { $this->ref_id = $request->getParameter('ref_id'); } else { $refParams = $request->getParameter('reference'); $this->reference_form->bind($refParams); $restOfParams = (array) $request->getParameterHolder(); $restOfParams = array_shift($restOfParams); $this->add_bulk_form->bind($restOfParams, $request->getFiles()); if (!$this->reference_form->isValid() || !$this->add_bulk_form->isValid()) { return; } if ($this->ref_id = $refParams['existing_source']) { $ref = Doctrine::getTable('Reference')->find($this->ref_id); $url = $ref->source; } else { $ref = new Reference(); $ref->object_model = 'Entity'; $ref->object_id = $this->entity->id; $ref->source = $refParams['source']; $ref->name = $refParams['name']; $ref->source_detail = $refParams['source_detail']; $ref->publication_date = $refParams['publication_date']; $ref->save(); } $this->ref_id = $ref->id; $this->reference = $ref; } $verify_method = $request->getParameter('verify_method'); if ($this->add_method = $request->getParameter('add_method')) { if ($this->add_method == 'scrape') { //scrape ref url //set names to confirm $browser = new sfWebBrowser(); $entity_types = $request->getParameter('entity_types'); //FIND NAMES AT URL USING COMBO OF OPENCALAIS & LS CUSTOM HTML PARSING if (!$browser->get($ref->source)->responseIsError()) { $text = $browser->getResponseText(); $this->names = LsTextAnalysis::getHtmlEntityNames($text, $entity_types); $text = LsHtml::findParagraphs($text); $this->text = preg_replace('/<[^b][^>]*>/is', " ", $text); $this->confirm_names = true; return; } else { $request->setError('csv', 'problems finding names at that url'); } } else { if ($this->add_method == 'upload') { $file = $this->add_bulk_form->getValue('file'); $filename = 'uploaded_' . sha1($file->getOriginalName()); $extension = $file->getExtension($file->getOriginalExtension()); $filePath = sfConfig::get('sf_temp_dir') . '/' . $filename . $extension; $file->save($filePath); if ($filePath) { if ($spreadsheetArr = LsSpreadsheet::parse($filePath)) { $names = $spreadsheetArr['rows']; if (!in_array('name', $spreadsheetArr['headers'])) { $request->setError('file', 'The file you uploaded could not be parsed properly because there is no "name" column.'); return; } if (in_array('summary', $spreadsheetArr['headers'])) { foreach ($names as &$name) { $name['summary'] = str_replace(array('?', "'"), "'", $name['summary']); $name['summary'] = str_replace(array('?', '?', '"'), '"', $name['summary']); if (isset($name['title'])) { $name['description1'] = $name['title']; } } unset($name); } } else { $request->setError('file', 'The file you uploaded could not be parsed properly.'); return; } } else { $request->setError('file', 'You need to upload a file.'); return; } } else { if ($this->add_method == 'summary') { //parse summary for names $this->text = $this->entity->summary; $entity_types = $request->getParameter('entity_types'); $this->names = LsTextAnalysis::getTextEntityNames($this->text, $entity_types); $this->confirm_names = true; return; } else { if ($this->add_method == 'text') { $manual_names = $request->getParameter('manual_names'); if ($manual_names && $manual_names != "") { $manual_names = preg_split('#[\\r\\n]+#', $manual_names); $manual_names = array_map('trim', $manual_names); $names = array(); foreach ($manual_names as $name) { $names[] = array('name' => $name); } } else { $request->setError('csv', 'You did not add names properly.'); return; } } else { if ($this->add_method == 'db_search') { $this->db_search = true; } } } } } } //intermediate scrape page -- takes confirmed names, builds names arr if ($confirmed_names = $request->getParameter('confirmed_names')) { $restOfParams = (array) $request->getParameterHolder(); $restOfParams = array_shift($restOfParams); $this->add_bulk_form->bind($restOfParams, $request->getFiles()); if (!$this->add_bulk_form->isValid()) { $this->reference = Doctrine::getTable('reference')->find($this->ref_id); $this->names = unserialize(stripslashes($request->getParameter('names'))); $this->confirm_names = true; return; } $names = array(); foreach ($confirmed_names as $cn) { $names[] = array('name' => $cn); } $manual_names = $request->getParameter('manual_names'); if ($manual_names && $manual_names != "") { $manual_names = preg_split('#[\\r\\n]+#', $manual_names); $manual_names = array_map('trim', $manual_names); foreach ($manual_names as $name) { $names[] = array('name' => $name); } } } // LOAD IN RELATIONSHIP DEFAULTS if (isset($verify_method)) { $defaults = $request->getParameter('relationship'); if ($verify_method == 'enmasse') { $this->default_type = $request->getParameter('default_type'); $this->order = $request->getParameter('order'); $category_name = $request->getParameter('relationship_category_all'); $this->extensions = ExtensionDefinitionTable::getByTier(2, $this->default_type); $extensions_arr = array(); foreach ($this->extensions as $ext) { $extensions_arr[] = $ext->name; } } else { $category_name = $request->getParameter('relationship_category_one'); } if ($category_name) { $this->category_name = $category_name; if (!($category = Doctrine::getTable('RelationshipCategory')->findOneByName($category_name))) { $request->setError('csv', 'You did not select a relationship category.'); return; } $formClass = $category_name . 'Form'; $categoryForm = new $formClass(new Relationship()); $categoryForm->setDefaults($defaults); $this->form_schema = $categoryForm->getFormFieldSchema(); if (in_array($category_name, array('Position', 'Education', 'Membership', 'Donation', 'Lobbying', 'Ownership'))) { $this->field_names = array('description1', 'start_date', 'end_date', 'is_current'); } else { $this->field_names = array('description1', 'description2', 'start_date', 'end_date', 'is_current'); } $extraFields = array('Position' => array('is_board', 'is_executive'), 'Education' => array('degree_id'), 'Donation' => array('amount'), 'Transaction' => array('amount'), 'Lobbying' => array('amount'), 'Ownership' => array('percent_stake', 'shares')); if (isset($extraFields[$category_name])) { $this->field_names = array_merge($this->field_names, $extraFields[$category_name]); } } $this->matches = array(); // BOOT TO TOOLBAR OR LOOK FOR MATCHES FOR ENMASSE ADD if (isset($names) && count($names) > 0 || isset($this->db_search)) { if ($verify_method == 'onebyone') { if (isset($category_name)) { $defaults['category'] = $category_name; } $toolbar_names = array(); foreach ($names as $name) { $toolbar_names[] = $name['name']; } $this->getUser()->setAttribute('toolbar_names', $toolbar_names); $this->getUser()->setAttribute('toolbar_entity', $this->entity->id); $this->getUser()->setAttribute('toolbar_defaults', $defaults); $this->getUser()->setAttribute('toolbar_ref', $this->ref_id); $this->redirect('relationship/toolbar'); } else { $this->category_name = $category_name; if (isset($this->db_search)) { $num = $request->getParameter('num', 10); $page = $request->getParameter('page', 1); $q = LsDoctrineQuery::create()->from('Entity e')->where('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $this->entity->name . '[[:>:]]', '[[:<:]]' . $this->entity->name . '[[:>:]]')); foreach ($this->entity->Alias as $alias) { $q->orWhere('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $alias->name . '[[:>:]]', '[[:<:]]' . $alias->name . '[[:>:]]')); } $q->setHydrationMode(Doctrine::HYDRATE_ARRAY); $cat_id = constant('RelationshipTable::' . strtoupper($category_name) . '_CATEGORY'); $q->whereParenWrap(); $q->andWhere('NOT EXISTS (SELECT DISTINCT l.relationship_id FROM Link l ' . 'WHERE l.entity1_id = e.id AND l.entity2_id = ? AND l.category_id = ?)', array($this->entity['id'], $cat_id)); $summary_matches = $q->execute(); foreach ($summary_matches as $summary_match) { $aliases = array(); foreach ($this->entity->Alias as $alias) { $aliases[] = LsString::escapeStringForRegex($alias->name); } $aliases = implode("|", $aliases); $summary_match['summary'] = preg_replace('/(' . $aliases . ')/is', '<strong>$1</strong>', $summary_match['summary']); $this->matches[] = array('search_results' => array($summary_match)); } } else { for ($i = 0; $i < count($names); $i++) { if (isset($names[$i]['name']) && trim($names[$i]['name']) != '') { $name = $names[$i]['name']; $name_terms = $name; if ($this->default_type == 'Person') { $name_parts = preg_split('/\\s+/', $name); if (count($name_parts) > 1) { $name_terms = PersonTable::nameSearch($name); } $terms = $name_terms; $primary_ext = "Person"; } else { if ($this->default_type == 'Org') { $name_terms = OrgTable::nameSearch($name); $terms = $name_terms; $primary_ext = "Org"; } else { $terms = $name_terms; $primary_ext = null; } } $pager = EntityTable::getSphinxPager($terms, $page = 1, $num = 20, $listIds = null, $aliases = true, $primary_ext); $match = $names[$i]; $match['search_results'] = $pager->execute(); if (isset($names[$i]['types'])) { $types = explode(',', $names[$i]['types']); $types = array_map('trim', $types); $match['types'] = array(); foreach ($types as $type) { if (in_array($type, $extensions_arr)) { $match['types'][] = $type; } } } $this->matches[] = $match; } } } } } } } else { if ($page = $this->getRequestParameter('page')) { $this->page = $page; $this->num = $this->getRequestParameter('num', 50); } else { if ($request->isMethod('post') && $request->getParameter('commit') == 'Submit') { $this->ref_id = $this->getRequestParameter('ref_id'); $entity_ids = array(); $relationship_category = $this->getRequestParameter('category_name'); $order = $this->getRequestParameter('order'); $default_type = $request->getParameter('default_type'); $default_ref = Doctrine::getTable('Reference')->find($request->getParameter('ref_id')); for ($i = 0; $i < $this->getRequestParameter('count'); $i++) { if ($entity_id = $request->getParameter('entity_' . $i)) { $selected_entity_id = null; $relParams = $request->getParameter("relationship_" . $i); if ($relParams['ref_name']) { $ref['source'] = $relParams['ref_source']; $ref['name'] = $relParams['ref_name']; } if ($entity_id == 'new') { $name = $request->getParameter('new_name_' . $i); if ($default_type == 'Person') { $new_entity = PersonTable::parseFlatName($name); } else { $new_entity = new Entity(); $new_entity->addExtension('Org'); $new_entity->name = trim($name); } $new_entity->save(); $new_entity->blurb = $request->getParameter('new_blurb_' . $i); $new_entity->summary = $request->getParameter('new_summary_' . $i); if (!$ref) { $ref = $default_ref; } $new_entity->addReference($ref['source'], null, null, $ref['name']); if ($types = $request->getParameter('new_extensions_' . $i)) { foreach ($types as $type) { $new_entity->addExtension($type); } } $new_entity->save(); $selected_entity_id = $new_entity->id; } else { if ($entity_id > 0) { $selected_entity_id = $entity_id; LsCache::clearEntityCacheById($selected_entity_id); } } if ($selected_entity_id) { $startDate = $relParams['start_date']; $endDate = $relParams['end_date']; unset($relParams['start_date'], $relParams['end_date'], $relParams['ref_name'], $relParams['ref_url']); $rel = new Relationship(); $rel->setCategory($relationship_category); if ($order == '1') { $rel->entity1_id = $this->entity['id']; $rel->entity2_id = $selected_entity_id; } else { $rel->entity2_id = $this->entity['id']; $rel->entity1_id = $selected_entity_id; } //only set dates if valid if ($startDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($startDate))) { $rel->start_date = Dateable::convertForDb($startDate); } if ($endDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($endDate))) { $rel->end_date = Dateable::convertForDb($endDate); } $rel->fromArray($relParams, null, $hydrateCategory = true); if ($request->hasParameter('add_method') && $request->getParameter('add_method') == 'db_search') { $refs = EntityTable::getSummaryReferences($selected_entity_id); if (count($refs)) { $ref = $refs[0]; } else { $refs = EntityTable::getAllReferencesById($selected_entity_id); if (count($refs)) { $ref = $refs[0]; } } } if (!$ref) { $ref = $default_ref; } $rel->saveWithRequiredReference(array('source' => $ref['source'], 'name' => $ref['name'])); $ref = null; } } } $this->clearCache($this->entity); $this->redirect($this->entity->getInternalUrl()); } else { if ($request->isMethod('post') && $request->getParameter('commit') == 'Cancel') { $this->redirect($this->entity->getInternalUrl()); } } } } }
/** * Uses Google Maps API to parse flat address */ static function parse($str, $returnAccuracy = false) { //CLEANUP $str = preg_replace('/[\\n\\r]/', ' ', $str); $key = sfConfig::get('sf_google_maps_key'); $url = 'http://maps.google.com/maps/geo?q=' . urlencode($str) . '&output=xml&key=' . $key; //echo $url . "\n"; $c = new sfWebBrowser(); try { if (!$c->get($url)->responseIsError()) { $c->setResponseText(iconv('ISO-8859-1', 'UTF-8', $c->getResponseText())); $xml = $c->getResponseXml(); //var_dump($xml); $structured = $xml->Response->Placemark->AddressDetails; $accuracy = (int) $structured['Accuracy']; } else { return null; } } catch (Exception $e) { // Adapter error (eg. Host not found) throw $e; } //accuracy of 4+ means we have at least a town (UNLESS THE TOWN DOESN'T "OFFICIALLY" EXIST) if ($accuracy > 3) { $address = new Address(); //COUNTRY (USA only for now) $address->country_id = 1; //STATE (for some reason trickery needs to be done to get state to work right) $stateName = null; if (isset($structured->Country->AdministrativeArea->AdministrativeAreaName)) { $stateName = (array) $structured->Country->AdministrativeArea->AdministrativeAreaName; $stateName = isset($stateName[0]) ? $stateName[0] : $stateName; } else { $possible_state = $structured->Country->CountryNameCode; if ($possible_state != 'US') { $stateName = $possible_state; } } if (!$stateName) { return null; } if (!($state = AddressStateTable::retrieveByText($stateName))) { return null; } $address->state_id = $state->id; //COUNTY (this may not exist) $countyName = $structured->Country->AdministrativeArea->SubAdministrativeArea->SubAdministrativeAreaName; $address->county = LsString::emptyToNull((string) $countyName); if ($countyName) { $cityName = $structured->Country->AdministrativeArea->SubAdministrativeArea->Locality->LocalityName; } else { $cityName = $structured->Country->AdministrativeArea->Locality->LocalityName; } //CITY (this may not exist!) $address->city = (string) $cityName; //accuracy of 5+ means we have postal code if ($accuracy > 4) { if ($cityName && $countyName) { $base = $structured->Country->AdministrativeArea->SubAdministrativeArea->Locality; if (isset($base->DependentLocality)) { $base = $base->DependentLocality; } } else { if ($cityName && !$countyName) { $base = $structured->Country->AdministrativeArea->Locality; } else { if (!$cityName && $countyName) { $base = $structured->Country->AdministrativeArea->SubAdministrativeArea; } else { $base = $structured->Country->AdministrativeArea; } } } //POSTAL CODE (for some reason trickery needs to be done to get postal code to work right) if ($postalCode = (array) $base->PostalCode->PostalCodeNumber) { $postalCode = $postalCode[0]; $address->postal = (string) $postalCode; } //accuracy of 8 means we have exact match //echo $accuracy . "\n"; if ($accuracy > 5) { //STREET (unit info is lost) //echo "street1 info found \n"; $street1 = $base->Thoroughfare->ThoroughfareName; $address->street1 = (string) $street1; } } //COORDINATES // Parse the coordinate string $coords = $c->getResponseXml()->Response->Placemark->Point->coordinates; list($lon, $lat, $alt) = explode(",", $coords); $address->longitude = $lon; $address->latitude = $lat; if ($returnAccuracy) { return array('address' => $address, 'accuracy' => $accuracy); } else { return $address; } } else { return null; } }
static function withinN($subject, $search1, $search2, $n) { $arr = LsString::split($subject); $w = '[^\\s]+\\s+'; $re = '/(' . $w . '){0,' . $n . '}' . $search1 . '\\b\\,?\\s*(' . $w . '){0,' . $n . '}/'; if (preg_match_all($re, $subject, $matches)) { foreach ($matches[0] as $match) { if (preg_match('/\\b' . $search2 . '/isu', $match)) { return true; } } } return false; }
static function convertValueForDisplay($value, $field, $excerpt = 40) { if (is_null($value)) { return 'NULL'; } if (!($mod = self::loadModification($field))) { return $value; } $table = Doctrine::getTable($mod['object_model']); $columns = $table->getColumns(); if ($mod['object_model'] == 'Entity') { if (!array_key_exists($field['field_name'], $columns)) { if ($extensionName = EntityTable::getExtensionNameByFieldName($field['field_name'])) { $table = Doctrine::getTable($extensionName); } } } elseif ($mod['object_model'] == 'Relationship') { if (!array_key_exists($field['field_name'], $columns)) { $table = Doctrine::getTable(RelationshipTable::getCategoryNameByFieldName($field['field_name'])); } } if ($alias = self::getFieldNameAlias($field)) { $class = $table->getRelation($alias)->getClass(); if ($record = Doctrine::getTable($class)->find($value, Doctrine::HYDRATE_ARRAY)) { if ($class == 'Entity') { sfLoader::loadHelpers('Ls'); return entity_link($record, null); } elseif ($class == 'sfGuardUser') { sfLoader::loadHelpers('Ls'); return user_link($record); } return $record; } } if (in_array($field['field_name'], array('start_date', 'end_date'))) { return Dateable::convertForDisplay($value); } $def = $table->getColumnDefinition($field['field_name']); switch ($def['type']) { case 'integer': return (double) $value; break; case 'boolean': return $value ? 'yes' : 'no'; break; } if ($excerpt) { $short = LsString::excerpt($value, $excerpt); return $short == $value ? $value : '<span title="' . strip_tags($value) . '">' . $short . '</span>'; } return $value; }
private function parseBlurb($info, $age_match) { if (count($info['blurb_arr']) == 0) { return $info; } $id = $age_match['name_match']['id']; $person = Doctrine::getTable('Entity')->find($id); $name_words = explode(' ', $person->name); $skip = array('director', 'directors', 'since', 'board', $info['since'], $age_match['age'], 'age'); $skip = array_merge($skip, $name_words); $new = array(); foreach ($info['blurb_arr'] as $b) { $n = $b; foreach ($skip as $s) { $s = LsString::escapeStringForRegex($s); $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n); } $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n); $n = LsString::stripNonAlpha($n, ' '); $words = preg_split('/\\s+/s', $n); if (count($words) > 3) { $new[] = $b; } } if (count($new) > 0) { $blurb = implode(' ', $new); $blurb_parts = preg_split('/\\s+/s', $blurb); $skip = array_merge($skip, array('executive', 'vice', 'president', 'chief', 'chairman', 'of', 'the')); $n = $blurb; foreach ($skip as $s) { $s = LsString::escapeStringForRegex($s); $n = preg_replace('/\\b' . $s . '\\b/isu', '', $n); } $n = preg_replace('/\\b\\d\\d\\d\\d\\b/', '', $n); $n = LsString::stripNonAlpha($n, ' '); $words = preg_split('/\\s+/s', $n); if (count($words) > 4) { $info['blurb'] = $blurb; } } return $info; }
protected function importGovernor($row) { $url = $this->_baseUrl . $row['url']; if (!$this->browser->get($url)->responseIsError()) { $text = $this->browser->getResponseText(); $text = LsHtml::replaceEntities($text); //preg_match('/>Family\:<\/b>([^<]*)<br/is',$text,$family_arr); $name = trim(str_ireplace('Gov.', '', $row['name'])); $this->printDebug(''); $this->printDebug($name . ':'); $governor = PersonTable::parseFlatName($name); $governor->addExtension('PoliticalCandidate'); $governor->addExtension('ElectedRepresentative'); $governor->is_state = 1; $similar = $governor->getSimilarEntitiesQuery(true)->execute(); foreach ($similar as $s) { $sim_re = LsString::escapeStringForRegex($s->name_first); $search_re = LsString::escapeStringForRegex($governor->name_first); if (preg_match('/^' . $sim_re . '/su', $governor->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) { continue; } $bio = $s->getExtendedBio(); if (preg_match('/\\bgovernor(ship)?\\b/isu', $bio)) { $governor = $s; $this->printDebug(' Found existing governor: ' . $s->name . ' ' . $s->id); break; } } $governor->save(); $this->printDebug($governor->id); if (!$governor->start_date && preg_match('/>Born\\:<\\/b>([^<]*)<br/is', $text, $birth_arr)) { $this->printDebug(' Birthdate: ' . $birth_arr[1]); $governor->start_date = trim($birth_arr[1]); } if (!$governor->birthplace && preg_match('/>Birth State\\:<\\/b>([^<]*)<br/is', $text, $birth_state_arr)) { $this->printDebug(' Birthplace: ' . trim($birth_state_arr[1])); $governor->birthplace = trim($birth_state_arr[1]); } //PARTY MEMBERSHIP if (preg_match('/>Party\\:<\\/b>([^<]*)<br/is', $text, $party_arr)) { $party_str = $party_arr[1]; $this->printDebug(' Party: ' . $party_str); if (stristr($party_str, 'Democrat')) { $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Democratic Party')->fetchOne(); } if (stristr($party_str, 'Republican')) { $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Republican Party')->fetchOne(); } if (isset($party) && $party && !$governor->party_id) { $governor->Party = $party; $governor->is_independent = false; $this->printDebug(' Added membership in ' . $party); } else { if (stristr($party_str, 'Independent')) { $governor->is_independent = true; } } } if (!$governor->summary && preg_match_all('/>([^<]{240,})/isu', $text, $bio_match)) { $str = ''; foreach ($bio_match[1] as $b) { if (!stristr($b, 'Javascript')) { $str .= "\n\n" . $b; } } $str = trim($str); if (strlen($str)) { $governor->summary = $str; } } $governor->save(); $governor->addReference($url, null, $governor->getAllModifiedFields(), 'Governors Association'); //SCHOOLS if (preg_match('/>School\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $school_arr)) { $school_names = explode(';', trim($school_arr[1])); if (count($school_names) == 1) { $school_names = explode(',', $school_names[0]); } foreach ($school_names as $school_name) { $school_name = trim($school_name); if (!($school = EntityTable::getByExtensionQuery('School')->leftJoin('e.Alias a')->addWhere('e.name = ? or a.name = ?', array($school_name, $school_name))->fetchOne())) { $school = new Entity(); $school->addExtension('Org'); $school->addExtension('School'); $school->name = $school_name; $school->save(); $this->printDebug(' Added School: ' . $school_name); } $q = RelationshipTable::getByCategoryQuery('Education')->addWhere('entity1_id = ? and entity2_id = ?', array($governor->id, $school->id))->fetchOne(); if (!$q) { $relationship = new Relationship(); $relationship->setCategory('Education'); $relationship->Entity1 = $governor; $relationship->Entity2 = $school; $relationship->is_current = 0; $relationship->save(); $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added education: ' . $relationship->name); } } } //GOVERNOR OFFICE AND POSITION $office_name = 'Office of the Governor of ' . $row['state']; if (!($office = EntityTable::getByExtensionQuery('GovernmentBody')->addWhere('name = ?', $office_name)->fetchOne())) { $office = new Entity(); $office->name = $office_name; $office->addExtension('Org'); $office->addExtension('GovernmentBody'); $state = Doctrine::getTable('AddressState')->findOneByName($row['state']); if ($state) { $office->state_id = $state->id; } $office->save(); $office->addReference($url, null, $office->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added office: ' . $office->name); } $q = RelationshipTable::getByCategoryQuery('Position')->addWhere('entity1_id = ? and entity2_id = ? and description1 = ?', array($governor->id, $office->id, 'Governor'))->fetchOne(); if (!$q) { sort($row['years']); $i = 0; while ($i < count($row['years'])) { $governorship = new Relationship(); $governorship->setCategory('Position'); $governorship->Entity1 = $governor; $governorship->Entity2 = $office; $governorship->description1 = 'Governor'; $governorship->start_date = $row['years'][$i]; $i++; if (isset($row['years'][$i])) { $governorship->end_date = $row['years'][$i]; $governorship->is_current = 0; if (!$governor->blurb && !isset($row['years'][$i + 1])) { $governor->blurb = 'Former Governor of ' . $row['state']; } } else { $governorship->is_current = 1; if (!$governor->blurb) { $governor->blurb = 'Governor of ' . $row['state']; } } $governor->save(); $i++; $governorship->save(); $governorship->addReference($url, null, $governorship->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added governorship: ' . $governorship->name); } } //SPOUSE if (preg_match('/>Spouse\\:<\\/b>(.*?)<br/is', $text, $spouse_arr)) { $spouse = trim(LsHtml::stripTags($spouse_arr[1])); $q = RelationshipTable::getByCategoryQuery('Family')->addWhere('entity1_id = ? or entity2_id = ?', array($governor->id, $governor->id))->fetchOne(); if (!$q && strlen($spouse)) { $spouse = PersonTable::parseFlatName($spouse); $spouse->save(); $this->printDebug(' Added spouse: ' . $spouse->name); $relationship = new Relationship(); $relationship->setCategory('Family'); $relationship->Entity1 = $spouse; $relationship->Entity2 = $governor; $relationship->description1 = 'Spouse'; $relationship->description2 = 'Spouse'; $relationship->save(); $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added spouse relationship: ' . $relationship->name); } } //ADDRESS --not working, malformed addresses /* if (preg_match('/>Address\:\s*<\/b>(.*?)<b>/is',$text,$address_arr)) { $address = trim(str_replace('<br/>',', ',$address_arr[1])); $this->printDebug($address); if ($governor->Address->count() == 0 && $a = $governor->addAddress($address)) { $this->printDebug(' Address: ' . $a); $governor->save(); } }*/ //PHONE NUMBER if (preg_match('/>Phone\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $phone_arr)) { $phone_number = trim($phone_arr[1]); if (!$governor->Phone->count()) { $phone = $governor->addPhone($phone_number); $this->printDebug(' Phone: ' . $phone); } } if (!$governor->Image->count() && preg_match('/<img .*?class\\="display" src\\="([^"]*)"/is', $text, $img_arr)) { $url = $img_arr[1]; try { $fileName = ImageTable::createFiles($url, $governor->name_first); } catch (Exception $e) { $fileName = null; } if ($fileName) { //insert image record $image = new Image(); $image->filename = $fileName; $image->entity_id = $governor->id; $image->title = $governor->name; $image->caption = 'From Governors Association website'; $image->is_featured = true; $image->is_free = false; $image->url = $url; $image->save(); $this->printDebug("Imported image: " . $image->filename); } } } }
public function parseResults($match) { if (isset($match['bio'])) { $bio_dirty = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($match['bio'], "; "))); $bio_dirty = preg_replace('/(\\;\\s)+/is', '; ', $bio_dirty); } foreach ($match as $k => &$m) { $m = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($m, " "))); } if (isset($match['name'])) { $name = $match['name']; $bio = ''; if (isset($match['bio'])) { $bio = $match['bio']; } } else { return; } $this->printDebug("_________________________\n\nname: " . $name . "\n"); $this->printDebug("bio: " . $bio . "\n"); $accept = strtolower($this->readline('Process this entity? (n to skip) ')); if ($accept == 'n' || $accept == 'no') { return false; } if (!$this->org_org) { if ($this->last_first) { $entity = PersonTable::parseCommaName($name); } else { $entity = PersonTable::parseFlatName($name); } $similar_entities = PersonTable::getSimilarQuery2($entity)->execute(); } else { $entity = new Entity(); $entity->addExtension('Org'); foreach ($this->org_extensions as $ext) { $entity->addExtension($ext); } $entity->setEntityField('name', $name); $name = trim($name); $name = str_replace('.', '', $name); $similar_entities = OrgTable::getSimilarQuery($entity)->execute(); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n)'); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' Is this the same entity? (y or n) '); $attempts++; } if ($accept == 'y') { $entity = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); //sleep(1); break; } else { if ($accept == 'break') { break; } } } $created = false; if (!$matched) { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity->name_first . ' ' . $entity->name_last); } else { $this->printDebug(' New org: ' . $entity->name); } $accept = $this->readline(' create this new entity? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' create this new entity? (y or n) '); $attempts++; } if ($accept == 'y') { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug("\n Bio: {$bio} \n"); $accept = $this->readline(' Add this bio? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' add this bio? (y or n) '); $attempts++; } if ($accept == 'y') { $entity->summary = $bio; } } $entity->save(); $entity->addReference($this->url, null, null, $this->url_name); $created = true; $this->printDebug(' ' . $entity->name . ' saved'); //sleep(1); } } if (($matched || $created) && $entity->getPrimaryExtension() == 'Person') { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts++; } if ($accept == 'y') { $names = $entity->parseBio($bio_dirty); $this->printDebug(" Orgs that {$entity} has a position at?"); foreach ($names as $name) { $exists = false; $name = trim($name); $accept = $this->readline(" > {$name} :: an org? (y or n or b to break) "); $attempts = 1; $accept = strtolower($accept); while ($accept != 'y' && $accept != 'n' && $accept != 'b' && $attempts < 5) { $accept = $this->readline(" {$name} :: an org? (y or n or b to break) "); $accept = strtolower($accept); $attempts++; } if ($accept == 'b') { break; } else { if ($accept == 'y') { $this->printDebug(' .....looking for names.....'); $orgs = EntityTable::getByExtensionAndNameQuery('Org', $name)->limit(10)->execute(); $related_org = null; foreach ($orgs as $org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('entity1_id = ? and entity2_id = ?', array($entity->id, $org->id))->fetchOne(); if ($q) { $this->printDebug(' Position already exists, skipping...'); $exists = true; break; } $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = $org; break; } } if (!$related_org && !$exists) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = new Entity(); $related_org->addExtension('Org'); $related_org->name = preg_replace('/\\.(?!com)/i', '', $name); $extensions = $this->readline(" what extensions should this org get? (eg 'Business, LobbyingFirm, LawFirm') "); $extensions = preg_split('/\\,\\s*/isu', $extensions, -1, PREG_SPLIT_NO_EMPTY); try { foreach ($extensions as $extension) { $related_org->addExtension($extension); } $related_org->save(); $related_org->addReference($this->url, null, null, $this->url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with org creation, skipping'); $related_org = null; } } } if ($related_org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $related_org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); continue; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $related_org; $relationship->setCategory('Position'); $title = $this->readline(" Title for this position relationship? (<enter> to skip) "); if (strlen($title) > 2) { $relationship->description1 = $title; } $current = strtolower($this->readline(" Is the relationship current? (y or n or <enter> to skip) ")); if (in_array($current, array('y', 'yes'))) { $relationship->is_current = 1; } else { if (in_array($current, array('n', 'no'))) { $relationship->is_current = 0; } } $board = strtolower($this->readline(" Is the relationship a board position? (y or n or <enter> to skip) ")); if (in_array($board, array('y', 'yes'))) { $relationship->is_board = 1; } else { if (in_array($board, array('n', 'no'))) { $relationship->is_board = 0; } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } } } } } if ($matched || $created) { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity l')->where('l.entity_id = ? and l.list_id = ?', array($entity->id, $this->list->id))->fetchOne(); if (!$q) { $le = new LsListEntity(); $le->Entity = $entity; $le->LsList = $this->list; if (isset($match['rank'])) { if (preg_match('/(\\d+)/isu', $match['rank'], $m)) { $le->rank = $m[1]; } } $le->save(); $this->printDebug('List membership saved'); } } if ($this->org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $this->org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $this->org; $relationship->setCategory($this->relationship_category); if ($this->description1) { $relationship->description1 = $this->description1; } else { $description = $this->readline(" what description to give this relationship ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($description) > 2) { $relationship->description1 = $description; } } if ($this->relationship_category == 'Position') { $relationship->is_board = $this->is_board; } else { if ($this->relationship_category == 'Donation') { if ($this->amount) { $relationship->amount = $this->amount; } else { $amount = $this->readline(" what amount ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($amount) > 1) { $relationship->amount = $amount; } } } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } //dump history if (isset($match['affiliation1'])) { $affiliation = $match['affiliation']; //$this->printDebug($affiliation); } }
private function findPersonBio($page, $person, $org) { //$this->printDebug(''); $name_re = LsString::escapeStringForRegex($person->name_last); if (preg_match('/<title>([^<]*)<\\/title>/is', $page, $match)) { if (stristr($match[1], $person->name_last) && stristr($match[1], $person->name_first) && strlen($person->name_first) > 2) { $name_re .= '|' . LsString::escapeStringForRegex($person->name_first); } } $layout_tags = implode('|', LsHtml::$layoutTags); $re2 = '/>([^<]*?(' . $name_re . ')(\\s|,|<)(.*?))<\\/?(' . $layout_tags . ')/is'; $re = $re2 . 'u'; //$this->printDebug($re); $bio_match = null; if (preg_match_all($re, $page, $matches) || preg_match_all($re2, $page, $matches)) { //$this->printDebug('matches found'); $arr = array(); $most_reqs = 0; $qual = false; $news = false; foreach ($matches[1] as $match) { if (stristr($match, '}') || stristr($match, '{') || preg_match('/\\svar\\s/is', $match)) { //$this->printDebug('FAILED - curly brackets'); continue; } $str = LsHtml::replaceEntities($match); $str = LsHtml::stripTags($str, ''); $str = trim(LsString::spacesToSpace($str)); $this->printDebug(strlen($str)); if (strlen($str) > 3000) { $this->printDebug('FAILED - str too long'); continue; } if (preg_match('/(^|\\b)(' . $name_re . ')\\b/is', $str) == 0) { $this->printDebug($match . 'FAILED - no name match'); continue; } $word_count = count(explode(' ', $str)); if ($word_count < 12) { $this->printDebug('FAILED - str not long enough'); continue; } else { if (stristr($str, 'announce') || stristr($str, 'today') || stristr($str, '—') || stristr($str, '–') || preg_match('/^[^\\-]{0,100}\\-(\\-|\\s)/is', $str)) { $news = true; $this->printDebug('FAILED: dash / announced / today'); } else { if (preg_match('/(^|\\s)([\'"”])([^\\1]+)\\1/is', $str, $qm) && count(explode(' ', $qm[0])) > 6) { $news = true; $this->printDebug('FAILED: quote'); } else { if (preg_match_all('/\\s(\\p{Ll})+\\b/su', $str, $lcm) < 5) { $this->printDebug('FAILED: not enough lowercase'); } else { $bio_words = PersonTable::$commonBioWords; if (in_array('Lobbyist', $person->getExtensions())) { $bio_words = array_merge($bio_words, LobbyistTable::$commonBioWords); } $bio_words = implode('|', $bio_words); $bio_word_ct = preg_match_all('/\\s(' . $bio_words . ')\\s/is', $str, $matches); $str = trim($str); if (preg_match('/\\.$/is', $str) == 0) { $this->printDebug('no period at end of string'); } else { if ($bio_word_ct > 1) { $news = false; $qual = true; $arr[] = $str; } else { $this->printDebug('less than 2 bio words'); if ($news == false) { $str = preg_replace('/^[\\,\\.\\:\\;]\\s*/su', '', $str); $arr[] = $str; //array('str' => $str, 'bio_words' => $bio_word_ct); } } } } } } //$this->printDebug(''); } } if ($qual) { $arr = array_unique($arr); $ret = false; $bio = implode("\n\n", $arr); //$this->printDebug($name_re); if (strlen($bio) < 3000 && LsString::withinN($bio, '(' . $name_re . ')', '(is|was|holds|led|has|had|provides|practices|served|leads)', 2)) { if (preg_match('/^.*?\\b(' . $name_re . ')\\b/is', $bio, $m) && count(explode(' ', $m[0])) < 20) { $ret = true; $this->printDebug('SUCCESS'); } } else { $this->printDebug('within N failed !!!!'); } $org_test = true; if ($ret && stristr($org->name, $person->name_last)) { $org_test = false; if (strlen($person->name_first) > 1) { if (preg_match('/([^\\s]+\\s+){0,14}/is', $arr[0], $beg_match)) { $nf_re = LsString::escapeStringForRegex($person->name_first); if (preg_match('/\\b' . $nf_re . '\\b/is', $beg_match[0]) || preg_match('/\\b(Mr|Mrs|Ms)\\b/su', $arr[0])) { $org_test = true; //$this->printDebug('PASSED FIRST NAME TEST'); } } } else { if (preg_match('/\\b(he|she|him|her|his|mr|ms|mrs)\\b/is', $arr[0])) { $org_test = true; //$this->printDebug('PASSED POSSESSIVE TEST'); } } } if ($ret && $org_test) { return $bio; } } } else { $this->printDebug('no matches found'); } return false; }
public function convertValueForDisplay($value) { if (is_null($value)) { return '<span class="text_small">NULL</span>'; } if (!($record = $this->Modification->getObject(true))) { return $value; } $table = $record->getTable(); if ($record instanceof Entity) { $data = $record->getData(); if (!array_key_exists($this->field_name, $data)) { if ($extensionName = EntityTable::getExtensionNameByFieldName($this->field_name)) { $table = Doctrine::getTable($extensionName); } } } elseif ($record instanceof Relationship) { $data = $record->getData(); if (!array_key_exists($this->field_name, $data)) { $table = Doctrine::getTable(RelationshipTable::getCategoryNameByFieldName($this->field_name)); } } if ($alias = $this->getFieldNameAlias()) { $class = $table->getRelation($alias)->getClass(); if ($record = Doctrine::getTable($class)->find($value)) { if ($record instanceof Entity) { sfLoader::loadHelpers('Ls'); return entity_link($record, null); } return $record; } } if (in_array($this->field_name, array('start_date', 'end_date'))) { return Dateable::convertForDisplay($value); } $def = $table->getColumnDefinition($this->field_name); switch ($def['type']) { case 'integer': return (string) $value; break; case 'boolean': return $value ? 'yes' : 'no'; break; } return LsString::excerpt($value); }