protected function processRow($row) { foreach ($row as &$r) { $r = trim($r); } $edit = array('Search Name' => $row['name'], 'Affiliation Name' => $row['affiliation1'], 'Similar Names' => array(), 'New Person' => null, 'Existing Person' => null, 'New Org' => null, 'Existing Org' => null, 'New Relationship' => null); try { $this->db->beginTransaction(); $person = null; $search_person = PersonTable::parseFlatName($row['name']); $similar = $search_person->getSimilarEntitiesQuery(true)->execute(); $matched_bio = false; $similar_ids = array(); foreach ($similar as $s) { $similar_ids[] = $s->id; $sim_re = LsString::escapeStringForRegex($s->name_first); $search_re = LsString::escapeStringForRegex($search_person->name_first); if (preg_match('/^' . $sim_re . '/su', $search_person->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) { continue; } $matched = false; $affils = array(); $ct = 1; $matched_affils = array(); $unmatched_affils = array(); while (isset($row['affiliation' . $ct]) && trim($row['affiliation' . $ct]) != '') { $affil = trim($row['affiliation' . $ct]); $org = $s->checkAffiliations(array($affil)); if ($org) { $matched_affils[] = array($org, $affil); $edit['Existing Org'] = $org->id; break; } else { $unmatched_affils[] = $affil; } $ct++; } if (count($matched_affils)) { $person = $s; break; //$ret[] = array('person' => $s, $matched_affils, $unmatched_affils); } else { /*$str = implode(' ', $unmatched_affils); if (isset($row['bio'])) { $str .= ' ' . $row['bio']; }*/ $bio = $s->getExtendedBio(); foreach ($unmatched_affils as $affil) { $affil = OrgTable::removeSuffixes($affil); $this->printDebug($affil); $this->printDebug($bio); if (preg_match('/' . OrgTable::getNameRegex($affil) . '/su', $bio)) { $matched_bio = true; break; } } if ($matched_bio) { $person = $s; break; } else { $this->printDebug(' ' . $s->name . ' failed'); } } } $edit['Similar Names'] = array_slice($similar_ids, 0, 5); $no_match = false; if (!$person) { if (isset($row['bio']) && trim($row['bio']) != '') { $search_person->summary = $row['bio']; } $search_person->save(); $this->printDebug(' not found, new person saved: ' . $search_person->name); $search_person->addReference($this->source_url, null, null, $this->source_name); $no_match = true; $edit['New Person'] = $search_person->id; $person = $search_person; } else { if (isset($row['bio']) && trim($row['bio']) != '' && !$person->summary) { $person->summary = $row['bio']; $person->save(); } $this->printDebug(' **person found: ' . $person->name); $edit['Existing Person'] = $person->id; } if ($matched_bio || $no_match) { $orgs = OrgTable::getOrgsWithSimilarNames($row['affiliation1'], true); $max = -1; $affiliated_org = null; foreach ($orgs as $org) { $this->printDebug(' found match: ' . $org->name); $ct = $org->getRelatedEntitiesQuery('Person', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 2)->count(); if ($ct > $max) { $affiliated_org = $org; $edit['Existing Org'] = $affiliated_org->id; $max = $ct; } } if (!$affiliated_org) { $affiliated_org = new Entity(); $affiliated_org->addExtension('Org'); if (isset($row['affiliation1_extensions']) && $row['affiliation1_extensions'] != '') { $extensions = explode(',', $row['affiliation1_extensions']); foreach ($extensions as $ext) { $ext = trim($ext); if (in_array($ext, ExtensionDefinitionTable::$extensionNames)) { $affiliated_org->addExtension($ext); } } } else { //$affiliated_org->addExtension('Business'); } $affiliated_org->name = $row['affiliation1']; $affiliated_org->save(); $affiliated_org->addReference($this->source_url, null, null, $this->source_name); $edit['New Org'] = $affiliated_org->id; } $rel = new Relationship(); $rel->Entity1 = $person; $rel->Entity2 = $affiliated_org; $rel->setCategory('Position'); if (isset($row['affiliation1_title']) && $row['affiliation1_title'] != '') { $description = trim($row['affiliation1_title']); $rel->description1 = $description; if ($description == 'Director' || $description == 'Trustee' || preg_match('/^Chair/su', $description)) { $rel->is_board = 1; $rel->is_employee = 0; } } $rel->save(); $rel->addReference($this->source_url, null, null, $this->source_name); $edit['New Relationship'] = $rel->id; } if (isset($row['start_date']) && trim($row['start_date']) != '') { $edit['Relationship']['start_date'] = trim($row['start_date']); } if (isset($row['end_date']) && trim($row['end_date']) != '') { $edit['Relationship']['end_date'] = trim($row['end_date']); } if (isset($row['title']) && trim($row['title']) != '') { $edit['Relationship']['title'] = trim($row['title']); } if (isset($row['notes']) && trim($row['notes']) != '') { $edit['Relationship']['notes'] = trim($row['notes']); } if (isset($row['rank']) && $row['rank'] != '') { $edit['rank'] = $row['rank']; } $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } $this->edits[] = $edit; }
public function processRow($row) { if (isset($row['url']) && $row['url'] != '' && isset($row['url_name']) && $row['url_name'] != '') { $url = $row['url']; $url_name = $row['url_name']; } else { $url = $this->url; $url_name = $this->url_name; } foreach ($row as &$r) { trim($r); } unset($r); if ($this->entity) { $required = array('entity_name', 'primary_type', 'relationship_category'); } else { $required = array('entity_name', 'primary_type'); } foreach ($required as $req) { if (!isset($row[$req]) || $row[$req] == '') { $this->printDebug('!!! > skipping row, ' . $req . ' not set'); return; } } if ($row['primary_type'] != 'Person' && $row['primary_type'] != 'Org') { $this->printDebug('!!! > primary type not properly set, skipping row...'); return; } if ($this->entity) { $relationship_category = trim($row['relationship_category']); $relationship_category_id = array_search($relationship_category, RelationshipCategoryTable::$categoryNames); if (!$relationship_category_id) { $this->printDebug('!!! > relationship type not properly set, skipping row...'); return; } } $this->printDebug("processing: " . $row['entity_name'] . '......'); if ($row['primary_type'] == 'Person') { $entity2 = PersonTable::parseFlatName($row['entity_name']); $similar_entities = PersonTable::getSimilarQuery2($entity2)->execute(); } else { $entity2 = new Entity(); $entity2->addExtension('Org'); $entity2->setEntityField('name', $row['entity_name']); $similar_entities = OrgTable::getOrgsWithSimilarNames($entity2->name); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n or b to break)'); if ($accept == 'y') { $entity2 = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); break; } else { if ($accept == 'b') { break; } } } $created = false; if (!$matched) { if ($entity2->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity2->name_first . ' ' . $entity2->name_last); } else { $this->printDebug(' New org: ' . $entity2->name); } $accept = $this->readline(' create this new entity? (y or n) '); if ($accept == 'y') { try { $extensions = LsString::split($row['entity_extensions'], '\\s*\\,\\s*'); foreach ($extensions as $extension) { $entity2->addExtension($extension); } $entity2->save(); $entity2->addReference($url, null, null, $url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with extensions for this row'); } $fields = array('summary', 'blurb', 'website'); foreach ($fields as $field) { if (isset($row[$field])) { $entity2[$field] = $row[$field]; } } $entity2->save(); $entity2->addReference($url, null, null, $url_name); $created = true; $this->printDebug(' ' . $entity2->name . ' saved'); //sleep(1); } else { $entity2 = null; } } // create relationship if ($entity2) { if ($this->entity) { $relationship = new Relationship(); if (isset($row['relationship_order']) && $row['relationship_order'] != '') { if ($row['relationship_order'] == '1') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity2 = $this->entity; $relationship->Entity1 = $entity2; } } else { if ($relationship_category == 'Position' || $relationship_category == 'Education') { if ($row['primary_type'] == 'Org') { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } else { $relationship->Entity1 = $entity2; $relationship->Entity2 = $this->entity; } } else { $relationship->Entity1 = $this->entity; $relationship->Entity2 = $entity2; } } $relationship->setCategory($relationship_category); $cols = array('description1', 'description2', 'start_date', 'end_date', 'goods', 'amount', 'is_board', 'is_executive', 'is_employee'); foreach ($cols as $col) { if (isset($row[$col]) && $row[$col] != '') { try { $relationship[$col] = $row[$col]; } catch (Exception $e) { $this->printDebug(" could not set {$col} for relationship, skipping"); } } } $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ? and r.id <> ?', array($relationship->entity1_id, $relationship->entity2_id, $relationship->category_id, $relationship->id))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship->save(); $relationship->addReference($url, null, null, $url_name); $this->printDebug(" Relationship saved: {$relationship}\n"); } else { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.entity_id = ? and le.list_id = ?', array($entity2->id, $this->list->id))->fetchOne(); if ($q) { $this->printDebug(' (already on list, skipping...)'); return; } $le = new LsListEntity(); $le->LsList = $this->list; $le->Entity = $entity2; var_dump($row); if (isset($row['rank'])) { echo $row['rank']; $le->rank = $row['rank']; } $le->save(); } } } }