function parseDonorData($text)
 {
     $this->_entity_reference = false;
     $contributors = $this->getContributors($text);
     $this->printDebug("Found " . count($contributors) . " possible donations");
     foreach ($contributors as $contributor) {
         $contribution = $contributor[0];
         $donor = $this->generateDonor($contribution);
         $first_name_match = self::NO;
         $last_name_match = self::NO;
         $middle_name_match = self::NO;
         $common_name = self::NO;
         $organization_matches = 0;
         $city_match = self::NO;
         $state_match = self::NO;
         $zip_match = self::NO;
         $common_city = self::NO;
         if ($this->person->name_first == $donor->name_first) {
             $first_name_match = self::YES;
         }
         if ($this->person->name_middle == $donor->name_middle) {
             $middle_name_match = self::YES;
         }
         if ($this->person->name_last == $donor->name_last) {
             $last_name_match = self::YES;
         }
         //middle names are set
         if (strlen($this->person->name_middle) && strlen($donor->name_middle)) {
             if ($this->person->name_middle == $donor->name_middle) {
                 $middle_name_match = self::YES;
             } else {
                 //make sure the middle names
                 if (strlen($this->person->name_middle) > 1 && strlen($this->person->name_middle) > 1 && !stristr($this->person->name_middle, $donor->name_middle) && !stristr($donor->name_middle, $this->person->name_middle)) {
                     $middle_name_match = self::AMBIGUOUS;
                 }
                 //initials match
                 if ((strlen($this->person->name_middle) == 1 || strlen($donor->name_middle) == 1) && substr($this->person->name_middle, 0, 1) == substr($donor->name_middle, 0, 1)) {
                     $middle_name_match = self::AMBIGUOUS;
                 }
             }
         }
         if (strlen($donor->name_first) < 2) {
             $first_name_match = self::AMBIGUOUS;
         }
         if (strlen($donor->name_middle) > 0 && strlen($donor->name_middle) < 2) {
             $middle_name_match = self::AMBIGUOUS;
         }
         if (strlen($donor->name_last) < 2) {
             $last_name_match = self::AMBIGUOUS;
         }
         if (in_array($this->person->name_last, LsLanguage::$commonLastNames) && in_array($this->person->name_first, LsLanguage::$commonFirstNames)) {
             $common_name = self::YES;
         }
         //checking organizations
         $this->printDebug("  Donor name: " . $donor->name_first . " " . $donor->name_middle . " " . $donor->name_last);
         $this->printDebug("  Donor address: " . $donor->Address[0]->State->name . ", " . LsLanguage::titleize($donor->Address[0]->city) . ", " . $donor->Address[0]->postal);
         $this->printDebug("  Donor organization: " . LsLanguage::titleize(trim($donor->summary)));
         $this->printDebug("  Person name: " . $this->person->name_first . " " . $this->person->name_middle . " " . $this->person->name_last);
         //checking address
         foreach ($this->person->Address as $key => $address) {
             $this->printDebug("  Person Address: " . $address->State->name . ", " . $address->city . ", " . $address->postal . " ");
             if ($this->person->Address[$key]->State->name == $donor->Address[0]->State->name) {
                 $state_match = self::YES;
             }
             if (LsLanguage::titleize($this->person->Address[$key]->city) == LsLanguage::titleize($donor->Address[0]->city)) {
                 $city_match = self::YES;
             }
             if (substr($this->person->Address[$key]->postal, 0, 3) == substr($donor->Address[0]->postal, 0, 3)) {
                 $zip_match = self::AMBIGUOUS;
             }
             if ($this->person->Address[$key]->postal == $donor->Address[0]->postal) {
                 $zip_match = self::YES;
             }
             if (in_array($donor->Address[0]->postal, $this->temp_postal)) {
                 $zip_match == self::YES;
             }
             if (in_array($donor->Address[0]->city, LsLanguage::$commonCities)) {
                 $common_city = self::YES;
             }
             break;
             //currently support only one address;
         }
         //check that first and last names are exact match
         /*$q = LsDoctrineQuery::create()
           ->from('Entity e')
           ->leftJoin('e.Relationship r ON (r.entity2_id = e.id)')
           ->where('r.entity1_id = ? AND r.category_id = ?', array($this->person->id, RelationshipTable::POSITION_CATEGORY));*/
         $orgs = $this->person->getRelatedEntitiesQuery('Org', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 1)->execute();
         //$orgs = $q->execute();
         $bio = $this->person->summary;
         $aliases = $this->person->Alias;
         foreach ($aliases as $alias) {
             $this->printDebug("  Aliases: " . $alias->name . "...");
             $alias_name = LsLanguage::getCommonPronouns($this->person->name, $alias->name, array_merge(LsLanguage::$business, LsLanguage::$schools, LsLanguage::$grammar, LsLanguage::$states, LsLanguage::$geography, array($this->person->name_last, $this->person->name_first, $this->person->name_middle, $this->person->name_nick, 'Retired', 'Requested', 'Info', 'Employed')));
             $bio .= ' ' . $alias_name;
         }
         foreach ($orgs as $org) {
             $this->printDebug("  Person organizations: " . $org->name . "...");
             $bio .= ' ' . $org->name;
         }
         $summary_matches = LsLanguage::getCommonPronouns(LsLanguage::titleize(trim($donor->summary)), trim($bio), array_merge(LsLanguage::$business, LsLanguage::$schools, LsLanguage::$grammar, LsLanguage::$states, LsLanguage::$geography, array($this->person->name_last, $this->person->name_first, $this->person->name_middle, $this->person->name_nick, 'Retired', 'Requested', 'Info', 'Employed')));
         $this->printDebug("  Person organizations: " . $bio);
         $organization_matches = count($summary_matches);
         echo ' ';
         echo ' Matching First: ' . self::$labels[$first_name_match] . ", ";
         echo ' Last: ' . self::$labels[$last_name_match] . ", ";
         echo ' Middle: ' . self::$labels[$middle_name_match] . ", ";
         echo ' City: ' . self::$labels[$city_match] . ", ";
         echo ' State: ' . self::$labels[$state_match] . ", ";
         echo ' Zip: ' . self::$labels[$zip_match] . ", ";
         echo 'Organization count: ' . $organization_matches;
         if (count($summary_matches)) {
             $i = 0;
             echo "  (";
             foreach ($summary_matches as $key => $o) {
                 echo $o;
                 if ($i != count($summary_matches) - 1) {
                     echo ', ';
                     $i++;
                 }
             }
             echo ")";
         }
         echo "\n";
         $confident = false;
         /* direct hit */
         if ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $state_match == self::YES && $city_match == self::YES && $zip_match == self::YES) {
             $this->printDebug("  CONFIDENT 1");
             $confident = true;
         } elseif ($first_name_match > self::NO && $middle_name_match > self::NO && $last_name_match == self::YES && $organization_matches > 1 && !$common_name) {
             $this->printDebug("  CONFIDENT 2 (not common name)");
             $confident = true;
         } elseif ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $state_match > self::NO && $organization_matches && !$common_name) {
             $this->printDebug("  CONFIDENT 3 (not common name)");
             $confident = true;
         } elseif ($first_name_match == self::AMBIGUOUS && $middle_name_match == self::YES && $last_name_match == self::YES && $state_match > self::NO && $organization_matches && !$common_name) {
             $this->printDebug("  CONFIDENT 4 (not common name)");
             $confident = true;
         } elseif ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $state_match > self::NO && $city_match > self::NO && $zip_match > self::NO && !$common_city) {
             $this->printDebug("  CONFIDENT 5");
             $confident = true;
         } elseif ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $state_match > self::NO && $city_match > self::NO && $zip_match > self::NO && !$common_city && !$common_name) {
             $this->printDebug("  CONFIDENT 6");
             $confident = true;
         } elseif ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $state_match > self::NO && $city_match > self::NO && $zip_match > self::NO) {
             $this->printDebug("  CONFIDENT 7");
             $confident = true;
         } elseif ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $state_match > self::NO && $city_match > self::NO && $zip_match > self::NO) {
             $this->printDebug("  CONFIDENT 8");
             $confident = true;
         } elseif ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $state_match > self::NO && $city_match > self::NO && $zip_match > self::NO) {
             $this->printDebug("  CONFIDENT 9");
             $confident = true;
         } elseif ($first_name_match == self::YES && $middle_name_match > self::NO && $last_name_match == self::YES && $zip_match > self::YES && $organization_matches) {
             $this->printDebug("  CONFIDENT 10");
             $confident = true;
         }
         if ($this->prompt == 1) {
             $accept = $this->readline('  Is this the same entity? (y or n)');
             $attempts = 1;
             while ($accept != 'y' && $accept != 'n' && $attempts < 5) {
                 $accept = $this->readline('  Is this the same entity? (y or n) ');
                 $attempts++;
             }
             if ($accept == 'y') {
                 $confident = true;
             } else {
                 $confident = false;
             }
         }
         if ($confident) {
             $this->parseRecipients($contribution);
             $this->temp_postal[] = $donor->Address[0]->postal;
         } else {
             $this->printDebug("  NO CONFIDENCE. SKIPPING...\n");
         }
     }
 }
Esempio n. 2
0
 public function getCommonBioPronouns($str)
 {
     $eb = $this->getExtendedBio();
     $summary_matches = LsLanguage::getCommonPronouns(LsLanguage::titleize(trim($eb)), trim($str), array_merge(LsLanguage::$business, LsLanguage::$schools, LsLanguage::$grammar, LsLanguage::$states, LsLanguage::$geography, array($this->name_last, $this->name_first, $this->name_middle, $this->name_nick, 'Retired', 'Requested', 'Info', 'Employed')));
     return $summary_matches;
 }
Esempio n. 3
0
 function import(Entity $person, $possible_persons)
 {
     //loop through the people we found. usually just one.
     foreach ($possible_persons as $possible_person) {
         $this->printDebug('Query returned ' . count($possible_person) . ' person named ' . $possible_person->name);
         //this person does not provide education. we skip
         if (count($possible_person->education)) {
             $this->printDebug('Education found');
         } else {
             $this->printDebug('No education history found');
             continue;
         }
         //get employement info for this possible match
         $possible_person_bio = $possible_person->summary;
         if (count($possible_person->employment_history)) {
             foreach ($possible_person->employment_history as $employment) {
                 $possible_person_bio .= ' ' . $employment->company . " ";
             }
             $this->printDebug('Employment found');
         } else {
             $this->printDebug('No employment history found');
             continue;
         }
         //get employment info for the person in our database
         $relationship_orgs = $person->getRelatedEntitiesQuery('Org', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 1)->execute();
         $person_bio = $person->summary;
         foreach ($relationship_orgs as $org) {
             $person_bio .= ' ' . $org->name;
         }
         //lets see how many matches we get
         $matches = LsLanguage::getCommonPronouns($person_bio, trim($possible_person_bio), LsLanguage::$business);
         if (count($matches)) {
             foreach ($possible_person->education as $school) {
                 $school->institution = mb_convert_encoding($school->institution, 'UTF-8');
                 $school->institution = preg_replace('/–/isu', ' ', $school->institution);
                 $this->printDebug('Looking for the school: ' . $school->institution);
                 $current_school = EntityTable::findByAlias($school->institution, $context = 'bw_school');
                 //find school
                 if ($current_school) {
                     $this->printDebug('Found school');
                 } else {
                     $current_school = EntityTable::getByExtensionQuery(array('Org', 'School'))->addWhere('LOWER(org.name) LIKE ?', '%' . strtolower($school->institution) . "%")->fetchOne();
                     if (!$current_school) {
                         $new_school = new Entity();
                         $new_school->addExtension('Org');
                         $new_school->addExtension('School');
                         $new_school->name = $school->institution;
                         $wikipedia = new LsWikipedia();
                         $wikipedia->request($school->institution);
                         if ($wikipedia->execute() && !$wikipedia->isDisambiguation()) {
                             $info_box = $wikipedia->getInfoBox();
                             if (isset($info_box['students']) && preg_match('/([\\d\\,]{2,})/isu', $info_box['students']['clean'], $match)) {
                                 $new_school->students = LsNumber::clean($match[1]);
                             } else {
                                 $student_types = array('undergrad', 'postgrad', 'grad', 'doctoral');
                                 $num_students = 0;
                                 foreach ($student_types as $st) {
                                     if (isset($info_box[$st]) && preg_match('/([\\d\\,]{2,})/isu', $info_box[$st]['clean'], $match)) {
                                         $num_students += LsNumber::clean($match[1]);
                                     }
                                 }
                                 if ($num_students > 0) {
                                     $new_school->students = $num_students;
                                 }
                             }
                             if (isset($info_box['faculty']) && preg_match('/([\\d\\,]{2,})/isu', $info_box['faculty']['clean'], $match)) {
                                 $new_school->faculty = LsNumber::clean($match[1]);
                             }
                             if (isset($info_box['type'])) {
                                 if (stristr($info_box['type']['clean'], 'public')) {
                                     $new_school->is_private = 0;
                                 } else {
                                     if (stristr($info_box['type']['clean'], 'private')) {
                                         $new_school->is_private = 1;
                                     }
                                 }
                             }
                             if (isset($info_box['endowment'])) {
                                 if (preg_match('/(\\$[\\d\\,\\.\\s]+)(million|billion)/isu', $info_box['endowment']['clean'], $match)) {
                                     if (strtolower($match[2]) == 'billion') {
                                         $factor = 1000000000;
                                     } else {
                                         $factor = 1000000;
                                     }
                                     $new_school->endowment = LsNumber::formatDollarAmountAsNumber($match[1], $factor);
                                 }
                             }
                             if (isset($info_box['established'])) {
                                 $year = null;
                                 if ($date = LsDate::convertDate($info_box['established']['clean'])) {
                                     $new_school->start_date = $date;
                                 } else {
                                     if (preg_match('/\\b(\\d\\d\\d\\d)\\b/isu', $info_box['established']['clean'], $match)) {
                                         $new_school->start_date = $match[1];
                                     }
                                 }
                             }
                             $summary = trim($wikipedia->getIntroduction());
                             $summary = preg_replace('/\\n\\s*\\n/isu', '', $summary);
                             if (strlen($summary) > 10) {
                                 $new_school->summary = $summary;
                             }
                             $new_school->save();
                             $new_school->addReference($source = $wikipedia->getUrl(), $excerpt = null, $fields = array('summary'), $name = 'Wikipedia');
                         } else {
                             $new_school->save();
                         }
                         $current_school = $new_school;
                         $this->printDebug('Adding new school');
                     }
                     $alias = new Alias();
                     $alias->name = $school->institution;
                     $alias->context = 'bw_school';
                     $alias->Entity = $current_school;
                     $alias->save();
                 }
                 //find degree
                 $degree = null;
                 if (!($degree = DegreeTable::getByText($school->degree))) {
                     $degree = DegreeTable::addDegree($school->degree);
                     $this->printDebug('Adding new degree');
                 }
                 //find relationship
                 $relationship = null;
                 $relationships = $person->getRelationshipsWithQuery($current_school, RelationshipTable::EDUCATION_CATEGORY)->execute();
                 foreach ($relationships as $existing_relationship) {
                     if ($existing_relationship->degree_id == $degree->id) {
                         $relationship = $existing_relationship;
                         break;
                     }
                 }
                 if ($relationship) {
                     $this->printDebug('Relationship between person and school exists');
                 } else {
                     $relationship = new Relationship();
                     $relationship->Entity1 = $person;
                     $relationship->Entity2 = $current_school;
                     $relationship->description1 = 'student';
                     $relationship->is_current = 0;
                     if ($school->year) {
                         $relationship->end_date = $school->year;
                     }
                     $relationship->setCategory('Education');
                     $this->printDebug('Creating new relationship between person and school');
                 }
                 //save
                 $relationship->save();
                 //add degree and reference
                 if ($relationship->degree_id == null) {
                     $reference_name = strstr($school->source, 'wikipedia') ? "Wikipedia" : "BusinessWeek";
                     $relationship->Degree = $degree;
                     $relationship->save();
                     $relationship->addReference($source = $school->source, $excerpt = null, $fields = array('degree_id'), $name = $reference_name, $detail = null, $date = null);
                     $this->printDebug('Adding degree and reference');
                 }
             }
         } else {
             $this->printDebug('No organization matches');
             return false;
         }
     }
     return true;
 }
Esempio n. 4
0
 static function areSame(Entity $p1, Entity $p2, $consistency = false, $reporting = false)
 {
     //FIRST WE GET THE RELEVANT VARIABLES
     //first person
     $f1 = $p1->name_first;
     $m1 = $p1->name_middle;
     $l1 = $p1->name_last;
     $d1 = new LsDate($p1->start_date);
     $b1 = $p1->summary;
     //second person
     $f2 = $p2->name_first;
     $m2 = $p2->name_middle;
     $l2 = $p2->name_last;
     $d2 = new LsDate($p2->start_date);
     $b2 = $p2->summary;
     //DEFINE CONDITIONS
     //lasts
     $lasts = (object) '';
     $lasts->nonempty = $l1 && $l2;
     $lasts->match = $l1 == $l2;
     $lasts->noninitial = strlen($l1) > 1 && strlen($l2) > 1;
     $lasts->subset = stristr($l1, $l2) || stristr($l2, $l1);
     $lasts->compatible = !$lasts->nonempty || $lasts->subset;
     $lasts->uncommon = !in_array($l1, LsLanguage::$commonLastNames);
     //firsts
     $firsts = (object) '';
     $firsts->nonempty = $f1 && $f2;
     $firsts->match = $f1 == $f2;
     $firsts->noninitial = strlen($f1) > 1 && strlen($f2) > 1;
     $firsts->subset = stristr($f1, $f2) || stristr($f2, $f1);
     $firsts->compatible = !$firsts->nonempty || $firsts->subset;
     $firsts->uncommon = !in_array($f1, LsLanguage::$commonFirstNames);
     //middles
     $middles = (object) '';
     $middles->nonempty = $m1 && $m2;
     $middles->match = $m1 == $m2;
     $middles->noninitial = strlen($m1) > 1 && strlen($m2) > 1;
     $middles->subset = stristr($m1, $m2) || stristr($m2, $m1);
     $middles->compatible = !$middles->nonempty || $middles->subset;
     //dates
     $dates = (object) '';
     $dates->nonempty = !$d1->isBlank() && !$d2->isBlank();
     $dates->match = $dates->nonempty && (string) $d1 == (string) $d2;
     $dates->compatible = self::birthDatesAreCompatible($d1, $d2);
     //bios
     $bios = (object) '';
     $bios->nonempty = $b1 && $b2;
     $bios->count = count(LsLanguage::getCommonPronouns($b1, $b2, array_merge(array($f1, $l1, $f2, $l2), LsLanguage::$business, LsLanguage::$months, LsLanguage::$prefixes, LsLanguage::$schools, LsLanguage::$grammar)));
     //REPORTING
     $report = array($lasts, $firsts, $middles, $dates, $bios);
     //CHECK BUNDLES OF CONDITIONS FOR MATCHING
     if ($lasts->match && $lasts->noninitial && $firsts->match && $firsts->noninitial && $middles->nonempty && $middles->match && $dates->compatible && $dates->nonempty) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $firsts->nonempty && $firsts->match && $middles->match && $middles->noninitial && $dates->compatible && $dates->nonempty) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $lasts->uncommon && $firsts->match && $firsts->noninitial && $firsts->uncommon && $middles->compatible && $dates->compatible && $dates->nonempty) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $lasts->uncommon && $firsts->nonempty && $firsts->match && $middles->nonempty && $middles->match && $dates->compatible && $dates->nonempty) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $lasts->uncommon && $firsts->nonempty && $firsts->subset && $middles->match && $middles->noninitial && $dates->compatible && $dates->nonempty) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $lasts->uncommon && $firsts->match && $firsts->noninitial && $middles->match && $middles->noninitial && $dates->compatible) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $firsts->match && $firsts->noninitial && $middles->compatible && $dates->compatible && $bios->count > 7) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $firsts->match && $firsts->nonempty && $middles->match && $middles->nonempty && $dates->compatible && $bios->count > 7) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $lasts->uncommon && $firsts->nonempty && $firsts->match && $middles->compatible && $dates->compatible && $dates->nonempty && $bios->count > 7) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $firsts->nonempty && $firsts->subset && $middles->compatible && $dates->compatible && $dates->nonempty && $bios->count > 7 && !$consistency) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     if ($lasts->match && $lasts->noninitial && $firsts->nonempty && $firsts->subset && $dates->compatible && $bios->count > 15 && !$consistency) {
         array_unshift($report, true);
         return $reporting ? $report : true;
     }
     array_unshift($report, false);
     return $reporting ? $report : false;
 }