function import(Entity $person, $possible_persons) { //loop through the people we found. usually just one. foreach ($possible_persons as $possible_person) { $this->printDebug('Query returned ' . count($possible_person) . ' person named ' . $possible_person->name); //this person does not provide education. we skip if (count($possible_person->education)) { $this->printDebug('Education found'); } else { $this->printDebug('No education history found'); continue; } //get employement info for this possible match $possible_person_bio = $possible_person->summary; if (count($possible_person->employment_history)) { foreach ($possible_person->employment_history as $employment) { $possible_person_bio .= ' ' . $employment->company . " "; } $this->printDebug('Employment found'); } else { $this->printDebug('No employment history found'); continue; } //get employment info for the person in our database $relationship_orgs = $person->getRelatedEntitiesQuery('Org', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 1)->execute(); $person_bio = $person->summary; foreach ($relationship_orgs as $org) { $person_bio .= ' ' . $org->name; } //lets see how many matches we get $matches = LsLanguage::getCommonPronouns($person_bio, trim($possible_person_bio), LsLanguage::$business); if (count($matches)) { foreach ($possible_person->education as $school) { $school->institution = mb_convert_encoding($school->institution, 'UTF-8'); $school->institution = preg_replace('//isu', ' ', $school->institution); $this->printDebug('Looking for the school: ' . $school->institution); $current_school = EntityTable::findByAlias($school->institution, $context = 'bw_school'); //find school if ($current_school) { $this->printDebug('Found school'); } else { $current_school = EntityTable::getByExtensionQuery(array('Org', 'School'))->addWhere('LOWER(org.name) LIKE ?', '%' . strtolower($school->institution) . "%")->fetchOne(); if (!$current_school) { $new_school = new Entity(); $new_school->addExtension('Org'); $new_school->addExtension('School'); $new_school->name = $school->institution; $wikipedia = new LsWikipedia(); $wikipedia->request($school->institution); if ($wikipedia->execute() && !$wikipedia->isDisambiguation()) { $info_box = $wikipedia->getInfoBox(); if (isset($info_box['students']) && preg_match('/([\\d\\,]{2,})/isu', $info_box['students']['clean'], $match)) { $new_school->students = LsNumber::clean($match[1]); } else { $student_types = array('undergrad', 'postgrad', 'grad', 'doctoral'); $num_students = 0; foreach ($student_types as $st) { if (isset($info_box[$st]) && preg_match('/([\\d\\,]{2,})/isu', $info_box[$st]['clean'], $match)) { $num_students += LsNumber::clean($match[1]); } } if ($num_students > 0) { $new_school->students = $num_students; } } if (isset($info_box['faculty']) && preg_match('/([\\d\\,]{2,})/isu', $info_box['faculty']['clean'], $match)) { $new_school->faculty = LsNumber::clean($match[1]); } if (isset($info_box['type'])) { if (stristr($info_box['type']['clean'], 'public')) { $new_school->is_private = 0; } else { if (stristr($info_box['type']['clean'], 'private')) { $new_school->is_private = 1; } } } if (isset($info_box['endowment'])) { if (preg_match('/(\\$[\\d\\,\\.\\s]+)(million|billion)/isu', $info_box['endowment']['clean'], $match)) { if (strtolower($match[2]) == 'billion') { $factor = 1000000000; } else { $factor = 1000000; } $new_school->endowment = LsNumber::formatDollarAmountAsNumber($match[1], $factor); } } if (isset($info_box['established'])) { $year = null; if ($date = LsDate::convertDate($info_box['established']['clean'])) { $new_school->start_date = $date; } else { if (preg_match('/\\b(\\d\\d\\d\\d)\\b/isu', $info_box['established']['clean'], $match)) { $new_school->start_date = $match[1]; } } } $summary = trim($wikipedia->getIntroduction()); $summary = preg_replace('/\\n\\s*\\n/isu', '', $summary); if (strlen($summary) > 10) { $new_school->summary = $summary; } $new_school->save(); $new_school->addReference($source = $wikipedia->getUrl(), $excerpt = null, $fields = array('summary'), $name = 'Wikipedia'); } else { $new_school->save(); } $current_school = $new_school; $this->printDebug('Adding new school'); } $alias = new Alias(); $alias->name = $school->institution; $alias->context = 'bw_school'; $alias->Entity = $current_school; $alias->save(); } //find degree $degree = null; if (!($degree = DegreeTable::getByText($school->degree))) { $degree = DegreeTable::addDegree($school->degree); $this->printDebug('Adding new degree'); } //find relationship $relationship = null; $relationships = $person->getRelationshipsWithQuery($current_school, RelationshipTable::EDUCATION_CATEGORY)->execute(); foreach ($relationships as $existing_relationship) { if ($existing_relationship->degree_id == $degree->id) { $relationship = $existing_relationship; break; } } if ($relationship) { $this->printDebug('Relationship between person and school exists'); } else { $relationship = new Relationship(); $relationship->Entity1 = $person; $relationship->Entity2 = $current_school; $relationship->description1 = 'student'; $relationship->is_current = 0; if ($school->year) { $relationship->end_date = $school->year; } $relationship->setCategory('Education'); $this->printDebug('Creating new relationship between person and school'); } //save $relationship->save(); //add degree and reference if ($relationship->degree_id == null) { $reference_name = strstr($school->source, 'wikipedia') ? "Wikipedia" : "BusinessWeek"; $relationship->Degree = $degree; $relationship->save(); $relationship->addReference($source = $school->source, $excerpt = null, $fields = array('degree_id'), $name = $reference_name, $detail = null, $date = null); $this->printDebug('Adding degree and reference'); } } } else { $this->printDebug('No organization matches'); return false; } } return true; }