Ejemplo n.º 1
0
 protected function processRow($row)
 {
     foreach ($row as &$r) {
         $r = trim($r);
     }
     $edit = array('Search Name' => $row['name'], 'Affiliation Name' => $row['affiliation1'], 'Similar Names' => array(), 'New Person' => null, 'Existing Person' => null, 'New Org' => null, 'Existing Org' => null, 'New Relationship' => null);
     try {
         $this->db->beginTransaction();
         $person = null;
         $search_person = PersonTable::parseFlatName($row['name']);
         $similar = $search_person->getSimilarEntitiesQuery(true)->execute();
         $matched_bio = false;
         $similar_ids = array();
         foreach ($similar as $s) {
             $similar_ids[] = $s->id;
             $sim_re = LsString::escapeStringForRegex($s->name_first);
             $search_re = LsString::escapeStringForRegex($search_person->name_first);
             if (preg_match('/^' . $sim_re . '/su', $search_person->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) {
                 continue;
             }
             $matched = false;
             $affils = array();
             $ct = 1;
             $matched_affils = array();
             $unmatched_affils = array();
             while (isset($row['affiliation' . $ct]) && trim($row['affiliation' . $ct]) != '') {
                 $affil = trim($row['affiliation' . $ct]);
                 $org = $s->checkAffiliations(array($affil));
                 if ($org) {
                     $matched_affils[] = array($org, $affil);
                     $edit['Existing Org'] = $org->id;
                     break;
                 } else {
                     $unmatched_affils[] = $affil;
                 }
                 $ct++;
             }
             if (count($matched_affils)) {
                 $person = $s;
                 break;
                 //$ret[] = array('person' => $s, $matched_affils, $unmatched_affils);
             } else {
                 /*$str = implode(' ', $unmatched_affils);
                   if (isset($row['bio']))
                   {
                     $str .= ' ' . $row['bio'];
                   }*/
                 $bio = $s->getExtendedBio();
                 foreach ($unmatched_affils as $affil) {
                     $affil = OrgTable::removeSuffixes($affil);
                     $this->printDebug($affil);
                     $this->printDebug($bio);
                     if (preg_match('/' . OrgTable::getNameRegex($affil) . '/su', $bio)) {
                         $matched_bio = true;
                         break;
                     }
                 }
                 if ($matched_bio) {
                     $person = $s;
                     break;
                 } else {
                     $this->printDebug('  ' . $s->name . ' failed');
                 }
             }
         }
         $edit['Similar Names'] = array_slice($similar_ids, 0, 5);
         $no_match = false;
         if (!$person) {
             if (isset($row['bio']) && trim($row['bio']) != '') {
                 $search_person->summary = $row['bio'];
             }
             $search_person->save();
             $this->printDebug('  not found, new person saved: ' . $search_person->name);
             $search_person->addReference($this->source_url, null, null, $this->source_name);
             $no_match = true;
             $edit['New Person'] = $search_person->id;
             $person = $search_person;
         } else {
             if (isset($row['bio']) && trim($row['bio']) != '' && !$person->summary) {
                 $person->summary = $row['bio'];
                 $person->save();
             }
             $this->printDebug('  **person found: ' . $person->name);
             $edit['Existing Person'] = $person->id;
         }
         if ($matched_bio || $no_match) {
             $orgs = OrgTable::getOrgsWithSimilarNames($row['affiliation1'], true);
             $max = -1;
             $affiliated_org = null;
             foreach ($orgs as $org) {
                 $this->printDebug('    found match: ' . $org->name);
                 $ct = $org->getRelatedEntitiesQuery('Person', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 2)->count();
                 if ($ct > $max) {
                     $affiliated_org = $org;
                     $edit['Existing Org'] = $affiliated_org->id;
                     $max = $ct;
                 }
             }
             if (!$affiliated_org) {
                 $affiliated_org = new Entity();
                 $affiliated_org->addExtension('Org');
                 if (isset($row['affiliation1_extensions']) && $row['affiliation1_extensions'] != '') {
                     $extensions = explode(',', $row['affiliation1_extensions']);
                     foreach ($extensions as $ext) {
                         $ext = trim($ext);
                         if (in_array($ext, ExtensionDefinitionTable::$extensionNames)) {
                             $affiliated_org->addExtension($ext);
                         }
                     }
                 } else {
                     //$affiliated_org->addExtension('Business');
                 }
                 $affiliated_org->name = $row['affiliation1'];
                 $affiliated_org->save();
                 $affiliated_org->addReference($this->source_url, null, null, $this->source_name);
                 $edit['New Org'] = $affiliated_org->id;
             }
             $rel = new Relationship();
             $rel->Entity1 = $person;
             $rel->Entity2 = $affiliated_org;
             $rel->setCategory('Position');
             if (isset($row['affiliation1_title']) && $row['affiliation1_title'] != '') {
                 $description = trim($row['affiliation1_title']);
                 $rel->description1 = $description;
                 if ($description == 'Director' || $description == 'Trustee' || preg_match('/^Chair/su', $description)) {
                     $rel->is_board = 1;
                     $rel->is_employee = 0;
                 }
             }
             $rel->save();
             $rel->addReference($this->source_url, null, null, $this->source_name);
             $edit['New Relationship'] = $rel->id;
         }
         if (isset($row['start_date']) && trim($row['start_date']) != '') {
             $edit['Relationship']['start_date'] = trim($row['start_date']);
         }
         if (isset($row['end_date']) && trim($row['end_date']) != '') {
             $edit['Relationship']['end_date'] = trim($row['end_date']);
         }
         if (isset($row['title']) && trim($row['title']) != '') {
             $edit['Relationship']['title'] = trim($row['title']);
         }
         if (isset($row['notes']) && trim($row['notes']) != '') {
             $edit['Relationship']['notes'] = trim($row['notes']);
         }
         if (isset($row['rank']) && $row['rank'] != '') {
             $edit['rank'] = $row['rank'];
         }
         $this->db->commit();
     } catch (Exception $e) {
         $this->db->rollback();
         throw $e;
     }
     $this->edits[] = $edit;
 }
Ejemplo n.º 2
0
 public function processRow($row)
 {
     if (isset($row['url']) && $row['url'] != '' && isset($row['url_name']) && $row['url_name'] != '') {
         $url = $row['url'];
         $url_name = $row['url_name'];
     } else {
         $url = $this->url;
         $url_name = $this->url_name;
     }
     foreach ($row as &$r) {
         trim($r);
     }
     unset($r);
     if ($this->entity) {
         $required = array('entity_name', 'primary_type', 'relationship_category');
     } else {
         $required = array('entity_name', 'primary_type');
     }
     foreach ($required as $req) {
         if (!isset($row[$req]) || $row[$req] == '') {
             $this->printDebug('!!! > skipping row, ' . $req . ' not set');
             return;
         }
     }
     if ($row['primary_type'] != 'Person' && $row['primary_type'] != 'Org') {
         $this->printDebug('!!! > primary type not properly set, skipping row...');
         return;
     }
     if ($this->entity) {
         $relationship_category = trim($row['relationship_category']);
         $relationship_category_id = array_search($relationship_category, RelationshipCategoryTable::$categoryNames);
         if (!$relationship_category_id) {
             $this->printDebug('!!! > relationship type not properly set, skipping row...');
             return;
         }
     }
     $this->printDebug("processing: " . $row['entity_name'] . '......');
     if ($row['primary_type'] == 'Person') {
         $entity2 = PersonTable::parseFlatName($row['entity_name']);
         $similar_entities = PersonTable::getSimilarQuery2($entity2)->execute();
     } else {
         $entity2 = new Entity();
         $entity2->addExtension('Org');
         $entity2->setEntityField('name', $row['entity_name']);
         $similar_entities = OrgTable::getOrgsWithSimilarNames($entity2->name);
     }
     $matched = false;
     foreach ($similar_entities as $similar_entity) {
         if ($similar_entity['primary_ext'] == 'Person') {
             $this->printDebug('  POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . "  Bio :: {$similar_entity->summary})");
         } else {
             $this->printDebug('  POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')');
         }
         $accept = $this->readline('  Is this the same entity? (y or n or b to break)');
         if ($accept == 'y') {
             $entity2 = $similar_entity;
             $matched = true;
             $this->printDebug('             [accepted]');
             break;
         } else {
             if ($accept == 'b') {
                 break;
             }
         }
     }
     $created = false;
     if (!$matched) {
         if ($entity2->getPrimaryExtension() == 'Person') {
             $this->printDebug('  New person: ' . $entity2->name_first . ' ' . $entity2->name_last);
         } else {
             $this->printDebug('  New org: ' . $entity2->name);
         }
         $accept = $this->readline('    create this new entity? (y or n) ');
         if ($accept == 'y') {
             try {
                 $extensions = LsString::split($row['entity_extensions'], '\\s*\\,\\s*');
                 foreach ($extensions as $extension) {
                     $entity2->addExtension($extension);
                 }
                 $entity2->save();
                 $entity2->addReference($url, null, null, $url_name);
             } catch (Exception $e) {
                 $this->printDebug('   !!! problems with extensions for this row');
             }
             $fields = array('summary', 'blurb', 'website');
             foreach ($fields as $field) {
                 if (isset($row[$field])) {
                     $entity2[$field] = $row[$field];
                 }
             }
             $entity2->save();
             $entity2->addReference($url, null, null, $url_name);
             $created = true;
             $this->printDebug(' ' . $entity2->name . ' saved');
             //sleep(1);
         } else {
             $entity2 = null;
         }
     }
     // create relationship
     if ($entity2) {
         if ($this->entity) {
             $relationship = new Relationship();
             if (isset($row['relationship_order']) && $row['relationship_order'] != '') {
                 if ($row['relationship_order'] == '1') {
                     $relationship->Entity1 = $this->entity;
                     $relationship->Entity2 = $entity2;
                 } else {
                     $relationship->Entity2 = $this->entity;
                     $relationship->Entity1 = $entity2;
                 }
             } else {
                 if ($relationship_category == 'Position' || $relationship_category == 'Education') {
                     if ($row['primary_type'] == 'Org') {
                         $relationship->Entity1 = $this->entity;
                         $relationship->Entity2 = $entity2;
                     } else {
                         $relationship->Entity1 = $entity2;
                         $relationship->Entity2 = $this->entity;
                     }
                 } else {
                     $relationship->Entity1 = $this->entity;
                     $relationship->Entity2 = $entity2;
                 }
             }
             $relationship->setCategory($relationship_category);
             $cols = array('description1', 'description2', 'start_date', 'end_date', 'goods', 'amount', 'is_board', 'is_executive', 'is_employee');
             foreach ($cols as $col) {
                 if (isset($row[$col]) && $row[$col] != '') {
                     try {
                         $relationship[$col] = $row[$col];
                     } catch (Exception $e) {
                         $this->printDebug("   could not set {$col} for relationship, skipping");
                     }
                 }
             }
             $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ? and r.id <> ?', array($relationship->entity1_id, $relationship->entity2_id, $relationship->category_id, $relationship->id))->fetchOne();
             if ($q) {
                 $this->printDebug('   (relationship already found, skipping...)');
                 return;
             }
             $relationship->save();
             $relationship->addReference($url, null, null, $url_name);
             $this->printDebug(" Relationship saved: {$relationship}\n");
         } else {
             if ($this->list) {
                 $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.entity_id = ? and le.list_id = ?', array($entity2->id, $this->list->id))->fetchOne();
                 if ($q) {
                     $this->printDebug('   (already on list, skipping...)');
                     return;
                 }
                 $le = new LsListEntity();
                 $le->LsList = $this->list;
                 $le->Entity = $entity2;
                 var_dump($row);
                 if (isset($row['rank'])) {
                     echo $row['rank'];
                     $le->rank = $row['rank'];
                 }
                 $le->save();
             }
         }
     }
 }