public function executeSimilarEntities() { $db = Doctrine_Manager::connection(); if ($this->entity['primary_ext'] == 'Person') { $this->similar_entities = EntityTable::getSimilarEntitiesQuery($this->entity, $looseMatch = true)->setHydrationMode(Doctrine::HYDRATE_ARRAY)->limit(5)->execute(); } else { $name = OrgTable::stripName($this->entity['name']); $terms = preg_split('#\\s+#', $name); $filtered_terms = array(); foreach ($terms as $term) { if (strlen($term) > 1 && strpos($term, '&') === false) { $filtered_terms[] = $term; } } $filtered_terms = array_map(array('LsSphinxClient', 'cleanForQuery'), $filtered_terms); $filtered_terms = array_slice($filtered_terms, 0, 2); $this->similar_entities = array(); if (count($filtered_terms)) { $similar = EntityTable::getSphinxPager(join(' ', $filtered_terms), $page = 1, $num = 3, $listIds = null, $aliases = true, $primary_ext = "Org")->execute(); foreach ($similar as $entity) { if ($entity['id'] != $this->entity['id']) { $this->similar_entities[] = $entity; } } } } }
public function execute() { if (!$this->safeToRun('fortune1000')) { $this->printDebug('script already running'); die; } $this->setList(); switch ($this->year) { case 2008: $this->getCompanyList2008(); echo "list imported\n"; } while ($company = current($this->companies)) { try { $this->db->beginTransaction(); $company['name'] = OrgTable::stripNamePunctuation($company['name']); $rank = $company['rank']; $existing = Doctrine_Query::create()->from('Entity e')->where('name = ?', $company['name']); if ($existing->count() == 0) { switch ($this->year) { case 2008: $corp = $this->getCompany2008($fortune_id = $company['fortune_id'], $name = $company['name'], $revenue = $company['revenue']); } } else { //echo "corp already exists\n"; $corp = $existing->fetchOne(); } if ($corp) { //two corps can have the same rank, so searches for duplicate entity_id and rank $rank_existing = Doctrine_Query::create()->from('LsListEntity L')->where('list_id = ? and rank = ? and entity_id = ?', array($this->list->id, $rank, $corp->id))->count(); if ($rank_existing == 0) { $listentity = new LsListEntity(); $listentity->entity_id = $corp->id; $listentity->list_id = $this->list->id; $listentity->rank = $rank; $listentity->save(); echo "{$rank} {$corp->name} (saved)\n"; } else { echo "{$rank} {$corp->name} (already saved)\n"; } } unset($corp); if (!$this->testMode) { $this->db->commit(); } } catch (Exception $e) { $this->db->rollback(); throw $e; } next($this->companies); } }
public function getQueryUrl($params, $method = 'map.organization_search_v1') { $arr = array('apikey=' . $this->api_key); foreach ($params as $pk => $pv) { if ($pk == 'search' && $method == 'map.organization_search_v1') { $arr[] = 'search=' . urlencode(OrgTable::nameSearch($pv)); } else { $arr[] = $pk . "=" . urlencode($pv); } } $query_url = $this->api_url . $method . "." . $this->format . "?" . implode('&', $arr); $this->query_url = $query_url; return $query_url; }
public function hasSimilarName($str, $strict = false) { $str = OrgTable::removeSuffixes($str); $str = trim($str); if (!strlen($str)) { return false; } $terms = LsQuery::splitSearchPhrase($str); $matched = false; $names = $this->Entity->getAllNames(); foreach ($terms as &$term) { if (is_array($term)) { foreach ($term as &$t) { $t = LsString::escapeStringForRegex($t); } $term = implode('|', $term); } else { $term = LsString::escapeStringForRegex($term); } } unset($term); if ($terms[0] == 'The') { array_shift($terms); } foreach ($names as $name) { $matched = true; if (!preg_match('/^(The\\s+)?(' . $terms[0] . ')/isu', $name)) { $matched = false; continue; } foreach ($terms as $term) { $new = preg_replace('/((^|\\s)|\\b)(' . $term . ')(\\b|(\\s|$))/isu', ' ', $name, 1); if ($new == $name) { $matched = false; continue; } $name = $new; } $name = trim(OrgTable::removeSuffixes($name)); if ($strict && $matched && strlen($name) > 0 && count(LsString::split($name)) >= $strict) { $matched = false; } if ($matched == true) { break; } } return $matched; }
public function nameSearch() { if ($this->primary_ext == 'Person') { return PersonTable::nameSearch($this->name); } else { return OrgTable::nameSearch($this->name); } }
static function cleanNameForCategoryMatching($name) { $name = strtolower(OrgTable::removeSuffixes($name)); $name = str_replace("'", "", $name); return $name; }
public static function getByExtensionAndNameQuery($extensions, $str, $strength = 1) { $extensions = (array) $extensions; $str = trim($str); if (in_array('Org', $extensions)) { $str = OrgTable::removeSuffixes($str, $exclude = array('Bancorp')); } if (strlen($str) < 3) { return array(); } $q = EntityTable::getByExtensionQuery($extensions)->leftJoin('e.Alias a'); $search_queries = array($str); $arr = array('\\.' => ' ', '\\.' => '', '\\s&\\s' => ' and ', '\\sand\\s' => ' & ', ' & ' => ' ', ',' => '', '\\bUS\\b' => 'United States', 'United States\\b' => 'US'); $i = 0; while ($i < count($search_queries)) { $name = $search_queries[$i]; $i++; if (strlen($name) < 3) { break; } foreach ($arr as $k => $v) { $new = preg_replace('/' . $k . '/isu', $v, $name); if ($new != $name) { if (!in_array($new, $search_queries)) { $search_queries[] = $new; } } } } foreach ($search_queries as &$s) { if ($strength == 0) { $s = '%' . $s . '%'; } else { if ($strength == 1) { $s .= '%'; } } } unset($s); $e = implode(' or ', array_fill(0, count($search_queries), 'e.name like ?')); $a = implode(' or ', array_fill(0, count($search_queries), 'a.name like ?')); $search_queries = array_merge($search_queries, $search_queries); $q->addWhere($e . ' or ' . $a, $search_queries); return $q; }
static function getNameRegex($name) { $name = OrgTable::removeSuffixes($name); $name = LsString::escapeStringForRegex($name); $name = str_replace(',', ',?', $name); $name = str_replace('\\s+', ',?\\s+', $name); $name = str_replace('&', '(&|\\s*and\\s*)', $name); return $name; }
protected function processRow($row) { foreach ($row as &$r) { $r = trim($r); } $edit = array('Search Name' => $row['name'], 'Affiliation Name' => $row['affiliation1'], 'Similar Names' => array(), 'New Person' => null, 'Existing Person' => null, 'New Org' => null, 'Existing Org' => null, 'New Relationship' => null); try { $this->db->beginTransaction(); $person = null; $search_person = PersonTable::parseFlatName($row['name']); $similar = $search_person->getSimilarEntitiesQuery(true)->execute(); $matched_bio = false; $similar_ids = array(); foreach ($similar as $s) { $similar_ids[] = $s->id; $sim_re = LsString::escapeStringForRegex($s->name_first); $search_re = LsString::escapeStringForRegex($search_person->name_first); if (preg_match('/^' . $sim_re . '/su', $search_person->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) { continue; } $matched = false; $affils = array(); $ct = 1; $matched_affils = array(); $unmatched_affils = array(); while (isset($row['affiliation' . $ct]) && trim($row['affiliation' . $ct]) != '') { $affil = trim($row['affiliation' . $ct]); $org = $s->checkAffiliations(array($affil)); if ($org) { $matched_affils[] = array($org, $affil); $edit['Existing Org'] = $org->id; break; } else { $unmatched_affils[] = $affil; } $ct++; } if (count($matched_affils)) { $person = $s; break; //$ret[] = array('person' => $s, $matched_affils, $unmatched_affils); } else { /*$str = implode(' ', $unmatched_affils); if (isset($row['bio'])) { $str .= ' ' . $row['bio']; }*/ $bio = $s->getExtendedBio(); foreach ($unmatched_affils as $affil) { $affil = OrgTable::removeSuffixes($affil); $this->printDebug($affil); $this->printDebug($bio); if (preg_match('/' . OrgTable::getNameRegex($affil) . '/su', $bio)) { $matched_bio = true; break; } } if ($matched_bio) { $person = $s; break; } else { $this->printDebug(' ' . $s->name . ' failed'); } } } $edit['Similar Names'] = array_slice($similar_ids, 0, 5); $no_match = false; if (!$person) { if (isset($row['bio']) && trim($row['bio']) != '') { $search_person->summary = $row['bio']; } $search_person->save(); $this->printDebug(' not found, new person saved: ' . $search_person->name); $search_person->addReference($this->source_url, null, null, $this->source_name); $no_match = true; $edit['New Person'] = $search_person->id; $person = $search_person; } else { if (isset($row['bio']) && trim($row['bio']) != '' && !$person->summary) { $person->summary = $row['bio']; $person->save(); } $this->printDebug(' **person found: ' . $person->name); $edit['Existing Person'] = $person->id; } if ($matched_bio || $no_match) { $orgs = OrgTable::getOrgsWithSimilarNames($row['affiliation1'], true); $max = -1; $affiliated_org = null; foreach ($orgs as $org) { $this->printDebug(' found match: ' . $org->name); $ct = $org->getRelatedEntitiesQuery('Person', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 2)->count(); if ($ct > $max) { $affiliated_org = $org; $edit['Existing Org'] = $affiliated_org->id; $max = $ct; } } if (!$affiliated_org) { $affiliated_org = new Entity(); $affiliated_org->addExtension('Org'); if (isset($row['affiliation1_extensions']) && $row['affiliation1_extensions'] != '') { $extensions = explode(',', $row['affiliation1_extensions']); foreach ($extensions as $ext) { $ext = trim($ext); if (in_array($ext, ExtensionDefinitionTable::$extensionNames)) { $affiliated_org->addExtension($ext); } } } else { //$affiliated_org->addExtension('Business'); } $affiliated_org->name = $row['affiliation1']; $affiliated_org->save(); $affiliated_org->addReference($this->source_url, null, null, $this->source_name); $edit['New Org'] = $affiliated_org->id; } $rel = new Relationship(); $rel->Entity1 = $person; $rel->Entity2 = $affiliated_org; $rel->setCategory('Position'); if (isset($row['affiliation1_title']) && $row['affiliation1_title'] != '') { $description = trim($row['affiliation1_title']); $rel->description1 = $description; if ($description == 'Director' || $description == 'Trustee' || preg_match('/^Chair/su', $description)) { $rel->is_board = 1; $rel->is_employee = 0; } } $rel->save(); $rel->addReference($this->source_url, null, null, $this->source_name); $edit['New Relationship'] = $rel->id; } if (isset($row['start_date']) && trim($row['start_date']) != '') { $edit['Relationship']['start_date'] = trim($row['start_date']); } if (isset($row['end_date']) && trim($row['end_date']) != '') { $edit['Relationship']['end_date'] = trim($row['end_date']); } if (isset($row['title']) && trim($row['title']) != '') { $edit['Relationship']['title'] = trim($row['title']); } if (isset($row['notes']) && trim($row['notes']) != '') { $edit['Relationship']['notes'] = trim($row['notes']); } if (isset($row['rank']) && $row['rank'] != '') { $edit['rank'] = $row['rank']; } $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } $this->edits[] = $edit; }
public function executeAddBulk($request) { $this->checkList($request, false, false); $this->reference_form = new ReferenceForm(); $this->reference_form->setSelectObject($this->list); $this->csv_form = new CsvUploadForm(); if ($request->isMethod('post')) { $commit = $request->getParameter('commit'); if ($commit == 'Cancel') { $this->redirect(LsListTable::getInternalUrl($this->list)); } // IF REFERENCE INFO AND FILE HAVE BEEN SUBMITTED, LOAD DATA IN if ($request->hasParameter('reference') && $request->hasParameter('csv')) { $csvParams = $request->getParameter('csv'); $filePath = $request->getFilePath('csv[file]'); $this->csv_form->bind($csvParams, $request->getFiles('csv')); $refParams = $request->getParameter('reference'); $this->reference_form->bind($refParams); if ($this->reference_form->isValid()) { if ($spreadsheetArr = LsSpreadsheet::parse($filePath)) { $names = $spreadsheetArr['rows']; if (!in_array('name', $spreadsheetArr['headers'])) { $request->setError('csv', 'The file you uploaded could not be parsed properly because there is no "name" column.'); return; } } else { $request->setError('csv', 'The file you uploaded could not be parsed properly.'); return; } if ($this->ref_id = $refParams['existing_source']) { $ref = Doctrine::getTable('Reference')->find($this->ref_id); $url = $ref->source; } else { $ref = new Reference(); $ref->object_model = 'LsList'; $ref->object_id = $this->list->id; $ref->source = $refParams['source']; $ref->name = $refParams['name']; $ref->source_detail = $refParams['source_detail']; $ref->publication_date = $refParams['publication_date']; $ref->save(); $this->ref_id = $ref->id; } $this->default_type = $request->getParameter('default_type'); if (!$this->default_type) { $request->setError('csv', 'You need to choose a default type.'); return; } $this->extensions = ExtensionDefinitionTable::getByTier(2, $this->default_type); $extensions_arr = array(); foreach ($this->extensions as $ext) { $extensions_arr[] = $ext->name; } $this->matches = array(); if (isset($names) && count($names) > 0) { for ($i = 0; $i < count($names); $i++) { if (isset($names[$i]['name']) && trim($names[$i]['name']) != '') { $name = $names[$i]['name']; $name_terms = $name; if ($this->default_type == 'Person') { $name_parts = preg_split('/\\s+/', $name); if (count($name_parts) > 1) { $name_terms = PersonTable::nameSearch($name, true); } $terms = $name_terms; $primary_ext = "Person"; } else { if ($this->default_type == 'Org') { $name_terms = OrgTable::nameSearch($name); $terms = $name_terms; $primary_ext = "Org"; } else { $terms = $name_terms; $primary_ext = null; } } $pager = EntityTable::getSphinxPager($terms, $page = 1, $num = 20, $listIds = null, $aliases = true, $primary_ext); $match = array('name' => $name); $match['search_results'] = $pager->execute(); $match['blurb'] = isset($names[$i]['blurb']) ? $names[$i]['blurb'] : null; $match['rank'] = isset($names[$i]['rank']) ? $names[$i]['rank'] : null; $match['types'] = array(); if (isset($names[$i]['types'])) { $types = explode(',', $names[$i]['types']); $types = array_map('trim', $types); foreach ($types as $type) { if (in_array($type, $extensions_arr)) { $match['types'][] = $type; } } } $this->matches[] = $match; } } } } } else { if ($request->hasParameter('ref_id')) { $this->ref_id = $this->getRequestParameter('ref_id'); $entity_ids = array(); $default_type = $this->getRequestParameter('default_type'); for ($i = 0; $i < $this->getRequestParameter('count'); $i++) { if ($entity_id = $request->getParameter('entity_' . $i)) { $selected_entity_id = null; if ($entity_id == 'new') { $name = $request->getParameter('new_name_' . $i); if ($default_type == 'Person') { $new_entity = PersonTable::parseFlatName($name); } else { $new_entity = new Entity(); $new_entity->addExtension('Org'); $new_entity->name = trim($name); } if ($types = $request->getParameter('new_extensions_' . $i)) { foreach ($types as $type) { $new_entity->addExtension($type); } } $new_entity->save(); $new_entity->blurb = $request->getParameter('new_blurb_' . $i); $ref = Doctrine::getTable('Reference')->find($request->getParameter('ref_id')); $new_entity->addReference($ref->source, null, null, $ref->name); $new_entity->save(); $selected_entity_id = $new_entity->id; } else { if ($entity_id > 0) { $selected_entity_id = $entity_id; } } if ($selected_entity_id) { $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.list_id = ? AND le.entity_id = ?', array($this->list['id'], $selected_entity_id)); if (!$q->count()) { $ls_list_entity = new LsListEntity(); $ls_list_entity->list_id = $this->list->id; $ls_list_entity->entity_id = $selected_entity_id; $ls_list_entity->rank = $request->getParameter('entity_' . $i . '_rank'); $ls_list_entity->save(); LsCache::clearEntityCacheById($selected_entity_id); } } } } $this->clearCache($this->list); $this->clearRailsCache($this->list->id); $this->redirect($this->list->getInternalUrl()); } else { $request->setError('name', 'The name you entered is invalid'); } } } }
private function prepGovtName($str) { $str = trim($str); if ($str == 'HOUSE OF REPRESENTATIVES') { return array('US House of Representatives', null); } else { if ($str == 'SENATE') { return array('US Senate', null); } else { if ($str == 'NONE' || $str == 'UNDETERMINED' || $str == '') { return null; } else { if (preg_match('/(Navy|Army|Air\\sForce)\\,\\s+Dept\\s+of/', $str, $match)) { $str = str_replace($match[0], 'US ' . $match[1], $str); } } } } preg_match('/\\(([^\\)]+)\\)?/s', $str, $match); $abb = null; if (count($match)) { $str = trim(str_replace($match[0], '', $str)); $abb = $match[1]; } if ($abb == 'Corps of Engineers') { return array('US Army Corps of Engineers', null); } else { if ($abb == 'Other') { $abb = null; } } $str = str_replace(array('Natl', 'Dept', '.'), array('National', 'Department', ''), $str); $parts = explode(',', $str); if (count($parts) > 1) { $str = trim(array_pop($parts)); $str .= ' ' . implode('; ', $parts); } $str = LsLanguage::titleize(OrgTable::stripNamePunctuation($str)); return array($str, $abb); }
public function parseResults($match) { if (isset($match['bio'])) { $bio_dirty = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($match['bio'], "; "))); $bio_dirty = preg_replace('/(\\;\\s)+/is', '; ', $bio_dirty); } foreach ($match as $k => &$m) { $m = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($m, " "))); } if (isset($match['name'])) { $name = $match['name']; $bio = ''; if (isset($match['bio'])) { $bio = $match['bio']; } } else { return; } $this->printDebug("_________________________\n\nname: " . $name . "\n"); $this->printDebug("bio: " . $bio . "\n"); $accept = strtolower($this->readline('Process this entity? (n to skip) ')); if ($accept == 'n' || $accept == 'no') { return false; } if (!$this->org_org) { if ($this->last_first) { $entity = PersonTable::parseCommaName($name); } else { $entity = PersonTable::parseFlatName($name); } $similar_entities = PersonTable::getSimilarQuery2($entity)->execute(); } else { $entity = new Entity(); $entity->addExtension('Org'); foreach ($this->org_extensions as $ext) { $entity->addExtension($ext); } $entity->setEntityField('name', $name); $name = trim($name); $name = str_replace('.', '', $name); $similar_entities = OrgTable::getSimilarQuery($entity)->execute(); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n)'); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' Is this the same entity? (y or n) '); $attempts++; } if ($accept == 'y') { $entity = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); //sleep(1); break; } else { if ($accept == 'break') { break; } } } $created = false; if (!$matched) { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity->name_first . ' ' . $entity->name_last); } else { $this->printDebug(' New org: ' . $entity->name); } $accept = $this->readline(' create this new entity? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' create this new entity? (y or n) '); $attempts++; } if ($accept == 'y') { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug("\n Bio: {$bio} \n"); $accept = $this->readline(' Add this bio? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' add this bio? (y or n) '); $attempts++; } if ($accept == 'y') { $entity->summary = $bio; } } $entity->save(); $entity->addReference($this->url, null, null, $this->url_name); $created = true; $this->printDebug(' ' . $entity->name . ' saved'); //sleep(1); } } if (($matched || $created) && $entity->getPrimaryExtension() == 'Person') { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts++; } if ($accept == 'y') { $names = $entity->parseBio($bio_dirty); $this->printDebug(" Orgs that {$entity} has a position at?"); foreach ($names as $name) { $exists = false; $name = trim($name); $accept = $this->readline(" > {$name} :: an org? (y or n or b to break) "); $attempts = 1; $accept = strtolower($accept); while ($accept != 'y' && $accept != 'n' && $accept != 'b' && $attempts < 5) { $accept = $this->readline(" {$name} :: an org? (y or n or b to break) "); $accept = strtolower($accept); $attempts++; } if ($accept == 'b') { break; } else { if ($accept == 'y') { $this->printDebug(' .....looking for names.....'); $orgs = EntityTable::getByExtensionAndNameQuery('Org', $name)->limit(10)->execute(); $related_org = null; foreach ($orgs as $org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('entity1_id = ? and entity2_id = ?', array($entity->id, $org->id))->fetchOne(); if ($q) { $this->printDebug(' Position already exists, skipping...'); $exists = true; break; } $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = $org; break; } } if (!$related_org && !$exists) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = new Entity(); $related_org->addExtension('Org'); $related_org->name = preg_replace('/\\.(?!com)/i', '', $name); $extensions = $this->readline(" what extensions should this org get? (eg 'Business, LobbyingFirm, LawFirm') "); $extensions = preg_split('/\\,\\s*/isu', $extensions, -1, PREG_SPLIT_NO_EMPTY); try { foreach ($extensions as $extension) { $related_org->addExtension($extension); } $related_org->save(); $related_org->addReference($this->url, null, null, $this->url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with org creation, skipping'); $related_org = null; } } } if ($related_org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $related_org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); continue; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $related_org; $relationship->setCategory('Position'); $title = $this->readline(" Title for this position relationship? (<enter> to skip) "); if (strlen($title) > 2) { $relationship->description1 = $title; } $current = strtolower($this->readline(" Is the relationship current? (y or n or <enter> to skip) ")); if (in_array($current, array('y', 'yes'))) { $relationship->is_current = 1; } else { if (in_array($current, array('n', 'no'))) { $relationship->is_current = 0; } } $board = strtolower($this->readline(" Is the relationship a board position? (y or n or <enter> to skip) ")); if (in_array($board, array('y', 'yes'))) { $relationship->is_board = 1; } else { if (in_array($board, array('n', 'no'))) { $relationship->is_board = 0; } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } } } } } if ($matched || $created) { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity l')->where('l.entity_id = ? and l.list_id = ?', array($entity->id, $this->list->id))->fetchOne(); if (!$q) { $le = new LsListEntity(); $le->Entity = $entity; $le->LsList = $this->list; if (isset($match['rank'])) { if (preg_match('/(\\d+)/isu', $match['rank'], $m)) { $le->rank = $m[1]; } } $le->save(); $this->printDebug('List membership saved'); } } if ($this->org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $this->org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $this->org; $relationship->setCategory($this->relationship_category); if ($this->description1) { $relationship->description1 = $this->description1; } else { $description = $this->readline(" what description to give this relationship ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($description) > 2) { $relationship->description1 = $description; } } if ($this->relationship_category == 'Position') { $relationship->is_board = $this->is_board; } else { if ($this->relationship_category == 'Donation') { if ($this->amount) { $relationship->amount = $this->amount; } else { $amount = $this->readline(" what amount ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($amount) > 1) { $relationship->amount = $amount; } } } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } //dump history if (isset($match['affiliation1'])) { $affiliation = $match['affiliation']; //$this->printDebug($affiliation); } }
public function processRow($row) { if (isset($row['url']) && $row['url'] != '' && isset($row['url_name']) && $row['url_name'] != '') { $url = $row['url']; $url_name = $row['url_name']; } else { $url = $this->url; $url_name = $this->url_name; } foreach ($row as &$r) { trim($r); } unset($r); if ($this->entity) { $required = array('entity_name', 'primary_type', 'relationship_category'); } else { $required = array('entity_name', 'primary_type'); } foreach ($required as $req) { if (!isset($row[$req]) || $row[$req] == '') { $this->printDebug('!!! > skipping row, ' . $req . ' not set'); return; } } if ($row['primary_type'] != 'Person' && $row['primary_type'] != 'Org') { $this->printDebug('!!! > primary type not properly set, skipping row...'); return; } if ($this->entity) { $relationship_category = trim($row['relationship_category']); $relationship_category_id = array_search($relationship_category, RelationshipCategoryTable::$categoryNames); if (!$relationship_category_id) { $this->printDebug('!!! > relationship type not properly set, skipping row...'); return; } } $this->printDebug("processing: " . $row['entity_name'] . '......'); if ($row['primary_type'] == 'Person') { $entity2 = PersonTable::parseFlatName($row['entity_name']); $similar_entities = PersonTable::getSimilarQuery2($entity2)->execute(); } else { $entity2 = new Entity(); $entity2->addExtension('Org'); $entity2->setEntityField('name', $row['entity_name']); $similar_entities = OrgTable::getOrgsWithSimilarNames($entity2->name); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n or b to break)'); if ($accept == 'y') { $entity2 = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); break; } else { if ($accept == 'b') { break; } } } $created = false; if (!$matched) { if ($entity2->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity2->name_first . ' ' . $entity2->name_last); } else { $this->printDebug(' New org: ' . $entity2->name); } $accept = $this->readline(' create this new entity? (y or n) '); if ($accept == 'y') { try { $extensions = LsString::split($row['entity_extensions'], '\\s*\\,\\s*'); foreach ($extensions as $extension) { $entity2->addExtension($extension); } $entity2->save(); $entity2->addReference($url, null, null, $url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with extensions for this row'); } $fields = array('summary', 'blurb', 'website'); foreach ($fields as $field) { if (isset($row[$field])) { $entity2[$field] = $row[$field]; } } $entity2->save(); $entity2->addReference($url, null, null, $url_name); $created = true; $this->printDebug(' ' . $entity2->name . ' saved'); //sleep(1); } else { $entity2 = null; } } // create relationship if ($entity2) { if ($this->entity) { $relationship = new Relationship(); if (isset($row['relationship_order']) && $row['relationship_order'] != '') { if ($row['relationship_order'] == '1') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity2 = $this->entity; $relationship->Entity1 = $entity2; } } else { if ($relationship_category == 'Position' || $relationship_category == 'Education') { if ($row['primary_type'] == 'Org') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity1 = $entity2; $relationship->Entity2 = $this->entity; } } else { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } } $relationship->setCategory($relationship_category); $cols = array('description1', 'description2', 'start_date', 'end_date', 'goods', 'amount', 'is_board', 'is_executive', 'is_employee'); foreach ($cols as $col) { if (isset($row[$col]) && $row[$col] != '') { try { $relationship[$col] = $row[$col]; } catch (Exception $e) { $this->printDebug(" could not set {$col} for relationship, skipping"); } } } $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ? and r.id <> ?', array($relationship->entity1_id, $relationship->entity2_id, $relationship->category_id, $relationship->id))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship->save(); $relationship->addReference($url, null, null, $url_name); $this->printDebug(" Relationship saved: {$relationship}\n"); } else { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.entity_id = ? and le.list_id = ?', array($entity2->id, $this->list->id))->fetchOne(); if ($q) { $this->printDebug(' (already on list, skipping...)'); return; } $le = new LsListEntity(); $le->LsList = $this->list; $le->Entity = $entity2; var_dump($row); if (isset($row['rank'])) { echo $row['rank']; $le->rank = $row['rank']; } $le->save(); } } } }
public function executeAddBulk($request) { $this->checkEntity($request, false, false); $this->reference_form = new ReferenceForm(); $this->reference_form->setSelectObject($this->entity); $this->add_bulk_form = new AddBulkForm(); //get possible default categories $this->categories = LsDoctrineQuery::create()->select('c.name, c.name')->from('RelationshipCategory c')->orderBy('c.id')->fetchAll(PDO::FETCH_KEY_PAIR); array_unshift($this->categories, ''); if ($request->isMethod('post') && in_array($request->getParameter('commit'), array('Begin', 'Continue'))) { if ($request->hasParameter('ref_id')) { $this->ref_id = $request->getParameter('ref_id'); } else { $refParams = $request->getParameter('reference'); $this->reference_form->bind($refParams); $restOfParams = (array) $request->getParameterHolder(); $restOfParams = array_shift($restOfParams); $this->add_bulk_form->bind($restOfParams, $request->getFiles()); if (!$this->reference_form->isValid() || !$this->add_bulk_form->isValid()) { return; } if ($this->ref_id = $refParams['existing_source']) { $ref = Doctrine::getTable('Reference')->find($this->ref_id); $url = $ref->source; } else { $ref = new Reference(); $ref->object_model = 'Entity'; $ref->object_id = $this->entity->id; $ref->source = $refParams['source']; $ref->name = $refParams['name']; $ref->source_detail = $refParams['source_detail']; $ref->publication_date = $refParams['publication_date']; $ref->save(); } $this->ref_id = $ref->id; $this->reference = $ref; } $verify_method = $request->getParameter('verify_method'); if ($this->add_method = $request->getParameter('add_method')) { if ($this->add_method == 'scrape') { //scrape ref url //set names to confirm $browser = new sfWebBrowser(); $entity_types = $request->getParameter('entity_types'); //FIND NAMES AT URL USING COMBO OF OPENCALAIS & LS CUSTOM HTML PARSING if (!$browser->get($ref->source)->responseIsError()) { $text = $browser->getResponseText(); $this->names = LsTextAnalysis::getHtmlEntityNames($text, $entity_types); $text = LsHtml::findParagraphs($text); $this->text = preg_replace('/<[^b][^>]*>/is', " ", $text); $this->confirm_names = true; return; } else { $request->setError('csv', 'problems finding names at that url'); } } else { if ($this->add_method == 'upload') { $file = $this->add_bulk_form->getValue('file'); $filename = 'uploaded_' . sha1($file->getOriginalName()); $extension = $file->getExtension($file->getOriginalExtension()); $filePath = sfConfig::get('sf_temp_dir') . '/' . $filename . $extension; $file->save($filePath); if ($filePath) { if ($spreadsheetArr = LsSpreadsheet::parse($filePath)) { $names = $spreadsheetArr['rows']; if (!in_array('name', $spreadsheetArr['headers'])) { $request->setError('file', 'The file you uploaded could not be parsed properly because there is no "name" column.'); return; } if (in_array('summary', $spreadsheetArr['headers'])) { foreach ($names as &$name) { $name['summary'] = str_replace(array('?', "'"), "'", $name['summary']); $name['summary'] = str_replace(array('?', '?', '"'), '"', $name['summary']); if (isset($name['title'])) { $name['description1'] = $name['title']; } } unset($name); } } else { $request->setError('file', 'The file you uploaded could not be parsed properly.'); return; } } else { $request->setError('file', 'You need to upload a file.'); return; } } else { if ($this->add_method == 'summary') { //parse summary for names $this->text = $this->entity->summary; $entity_types = $request->getParameter('entity_types'); $this->names = LsTextAnalysis::getTextEntityNames($this->text, $entity_types); $this->confirm_names = true; return; } else { if ($this->add_method == 'text') { $manual_names = $request->getParameter('manual_names'); if ($manual_names && $manual_names != "") { $manual_names = preg_split('#[\\r\\n]+#', $manual_names); $manual_names = array_map('trim', $manual_names); $names = array(); foreach ($manual_names as $name) { $names[] = array('name' => $name); } } else { $request->setError('csv', 'You did not add names properly.'); return; } } else { if ($this->add_method == 'db_search') { $this->db_search = true; } } } } } } //intermediate scrape page -- takes confirmed names, builds names arr if ($confirmed_names = $request->getParameter('confirmed_names')) { $restOfParams = (array) $request->getParameterHolder(); $restOfParams = array_shift($restOfParams); $this->add_bulk_form->bind($restOfParams, $request->getFiles()); if (!$this->add_bulk_form->isValid()) { $this->reference = Doctrine::getTable('reference')->find($this->ref_id); $this->names = unserialize(stripslashes($request->getParameter('names'))); $this->confirm_names = true; return; } $names = array(); foreach ($confirmed_names as $cn) { $names[] = array('name' => $cn); } $manual_names = $request->getParameter('manual_names'); if ($manual_names && $manual_names != "") { $manual_names = preg_split('#[\\r\\n]+#', $manual_names); $manual_names = array_map('trim', $manual_names); foreach ($manual_names as $name) { $names[] = array('name' => $name); } } } // LOAD IN RELATIONSHIP DEFAULTS if (isset($verify_method)) { $defaults = $request->getParameter('relationship'); if ($verify_method == 'enmasse') { $this->default_type = $request->getParameter('default_type'); $this->order = $request->getParameter('order'); $category_name = $request->getParameter('relationship_category_all'); $this->extensions = ExtensionDefinitionTable::getByTier(2, $this->default_type); $extensions_arr = array(); foreach ($this->extensions as $ext) { $extensions_arr[] = $ext->name; } } else { $category_name = $request->getParameter('relationship_category_one'); } if ($category_name) { $this->category_name = $category_name; if (!($category = Doctrine::getTable('RelationshipCategory')->findOneByName($category_name))) { $request->setError('csv', 'You did not select a relationship category.'); return; } $formClass = $category_name . 'Form'; $categoryForm = new $formClass(new Relationship()); $categoryForm->setDefaults($defaults); $this->form_schema = $categoryForm->getFormFieldSchema(); if (in_array($category_name, array('Position', 'Education', 'Membership', 'Donation', 'Lobbying', 'Ownership'))) { $this->field_names = array('description1', 'start_date', 'end_date', 'is_current'); } else { $this->field_names = array('description1', 'description2', 'start_date', 'end_date', 'is_current'); } $extraFields = array('Position' => array('is_board', 'is_executive'), 'Education' => array('degree_id'), 'Donation' => array('amount'), 'Transaction' => array('amount'), 'Lobbying' => array('amount'), 'Ownership' => array('percent_stake', 'shares')); if (isset($extraFields[$category_name])) { $this->field_names = array_merge($this->field_names, $extraFields[$category_name]); } } $this->matches = array(); // BOOT TO TOOLBAR OR LOOK FOR MATCHES FOR ENMASSE ADD if (isset($names) && count($names) > 0 || isset($this->db_search)) { if ($verify_method == 'onebyone') { if (isset($category_name)) { $defaults['category'] = $category_name; } $toolbar_names = array(); foreach ($names as $name) { $toolbar_names[] = $name['name']; } $this->getUser()->setAttribute('toolbar_names', $toolbar_names); $this->getUser()->setAttribute('toolbar_entity', $this->entity->id); $this->getUser()->setAttribute('toolbar_defaults', $defaults); $this->getUser()->setAttribute('toolbar_ref', $this->ref_id); $this->redirect('relationship/toolbar'); } else { $this->category_name = $category_name; if (isset($this->db_search)) { $num = $request->getParameter('num', 10); $page = $request->getParameter('page', 1); $q = LsDoctrineQuery::create()->from('Entity e')->where('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $this->entity->name . '[[:>:]]', '[[:<:]]' . $this->entity->name . '[[:>:]]')); foreach ($this->entity->Alias as $alias) { $q->orWhere('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $alias->name . '[[:>:]]', '[[:<:]]' . $alias->name . '[[:>:]]')); } $q->setHydrationMode(Doctrine::HYDRATE_ARRAY); $cat_id = constant('RelationshipTable::' . strtoupper($category_name) . '_CATEGORY'); $q->whereParenWrap(); $q->andWhere('NOT EXISTS (SELECT DISTINCT l.relationship_id FROM Link l ' . 'WHERE l.entity1_id = e.id AND l.entity2_id = ? AND l.category_id = ?)', array($this->entity['id'], $cat_id)); $summary_matches = $q->execute(); foreach ($summary_matches as $summary_match) { $aliases = array(); foreach ($this->entity->Alias as $alias) { $aliases[] = LsString::escapeStringForRegex($alias->name); } $aliases = implode("|", $aliases); $summary_match['summary'] = preg_replace('/(' . $aliases . ')/is', '<strong>$1</strong>', $summary_match['summary']); $this->matches[] = array('search_results' => array($summary_match)); } } else { for ($i = 0; $i < count($names); $i++) { if (isset($names[$i]['name']) && trim($names[$i]['name']) != '') { $name = $names[$i]['name']; $name_terms = $name; if ($this->default_type == 'Person') { $name_parts = preg_split('/\\s+/', $name); if (count($name_parts) > 1) { $name_terms = PersonTable::nameSearch($name); } $terms = $name_terms; $primary_ext = "Person"; } else { if ($this->default_type == 'Org') { $name_terms = OrgTable::nameSearch($name); $terms = $name_terms; $primary_ext = "Org"; } else { $terms = $name_terms; $primary_ext = null; } } $pager = EntityTable::getSphinxPager($terms, $page = 1, $num = 20, $listIds = null, $aliases = true, $primary_ext); $match = $names[$i]; $match['search_results'] = $pager->execute(); if (isset($names[$i]['types'])) { $types = explode(',', $names[$i]['types']); $types = array_map('trim', $types); $match['types'] = array(); foreach ($types as $type) { if (in_array($type, $extensions_arr)) { $match['types'][] = $type; } } } $this->matches[] = $match; } } } } } } } else { if ($page = $this->getRequestParameter('page')) { $this->page = $page; $this->num = $this->getRequestParameter('num', 50); } else { if ($request->isMethod('post') && $request->getParameter('commit') == 'Submit') { $this->ref_id = $this->getRequestParameter('ref_id'); $entity_ids = array(); $relationship_category = $this->getRequestParameter('category_name'); $order = $this->getRequestParameter('order'); $default_type = $request->getParameter('default_type'); $default_ref = Doctrine::getTable('Reference')->find($request->getParameter('ref_id')); for ($i = 0; $i < $this->getRequestParameter('count'); $i++) { if ($entity_id = $request->getParameter('entity_' . $i)) { $selected_entity_id = null; $relParams = $request->getParameter("relationship_" . $i); if ($relParams['ref_name']) { $ref['source'] = $relParams['ref_source']; $ref['name'] = $relParams['ref_name']; } if ($entity_id == 'new') { $name = $request->getParameter('new_name_' . $i); if ($default_type == 'Person') { $new_entity = PersonTable::parseFlatName($name); } else { $new_entity = new Entity(); $new_entity->addExtension('Org'); $new_entity->name = trim($name); } $new_entity->save(); $new_entity->blurb = $request->getParameter('new_blurb_' . $i); $new_entity->summary = $request->getParameter('new_summary_' . $i); if (!$ref) { $ref = $default_ref; } $new_entity->addReference($ref['source'], null, null, $ref['name']); if ($types = $request->getParameter('new_extensions_' . $i)) { foreach ($types as $type) { $new_entity->addExtension($type); } } $new_entity->save(); $selected_entity_id = $new_entity->id; } else { if ($entity_id > 0) { $selected_entity_id = $entity_id; LsCache::clearEntityCacheById($selected_entity_id); } } if ($selected_entity_id) { $startDate = $relParams['start_date']; $endDate = $relParams['end_date']; unset($relParams['start_date'], $relParams['end_date'], $relParams['ref_name'], $relParams['ref_url']); $rel = new Relationship(); $rel->setCategory($relationship_category); if ($order == '1') { $rel->entity1_id = $this->entity['id']; $rel->entity2_id = $selected_entity_id; } else { $rel->entity2_id = $this->entity['id']; $rel->entity1_id = $selected_entity_id; } //only set dates if valid if ($startDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($startDate))) { $rel->start_date = Dateable::convertForDb($startDate); } if ($endDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($endDate))) { $rel->end_date = Dateable::convertForDb($endDate); } $rel->fromArray($relParams, null, $hydrateCategory = true); if ($request->hasParameter('add_method') && $request->getParameter('add_method') == 'db_search') { $refs = EntityTable::getSummaryReferences($selected_entity_id); if (count($refs)) { $ref = $refs[0]; } else { $refs = EntityTable::getAllReferencesById($selected_entity_id); if (count($refs)) { $ref = $refs[0]; } } } if (!$ref) { $ref = $default_ref; } $rel->saveWithRequiredReference(array('source' => $ref['source'], 'name' => $ref['name'])); $ref = null; } } } $this->clearCache($this->entity); $this->redirect($this->entity->getInternalUrl()); } else { if ($request->isMethod('post') && $request->getParameter('commit') == 'Cancel') { $this->redirect($this->entity->getInternalUrl()); } } } } }