protected function import($url) { $person = null; $this->printDebug($url); if (!$this->browser->get($url)->responseIsError()) { $text = $this->browser->getResponseText(); $bio = null; $name = null; $netWorth = null; $birthYear = null; $schools = null; $schools = null; $imageUrl = null; $rank = null; //get name & rank if ($this->year > 2005 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/', $text, $match)) { $name = trim($match[2]); $rank = $match[1]; } if ($this->year == 2005 && preg_match('/<h2>#(\\d+) ([^<]+)<\\/h2>/', $text, $match)) { $name = trim($match[2]); $rank = $match[1]; } //get net worth if (preg_match('/Net Worth<\\/span> <span class="red">\\$([\\S]+) billion/', $text, $match)) { $netWorth = $match[1] * 1000000000; } //get birth year if (preg_match('/>Age<\\/span> (\\d+)/', $text, $match)) { $birthYear = date("Y") - $match[1] . "-00-00"; } //get schools if (preg_match('/Education<\\/span>(.*)<\\/td>/isU', $text, $match)) { $schools = array(); $schoolParts = explode('<br>', $match[1]); while ($schoolPart = current($schoolParts)) { if (preg_match('/^([^,]+),\\s+<b>([^<]+)<\\/b>/is', trim($schoolPart), $match)) { $schoolOrg = trim($match[1]); if ($schoolOrg == 'High School') { next($schoolParts); continue; } $schoolDegree = trim($match[2]); $schools[] = array('org' => $schoolOrg, 'degree' => $schoolDegree); } next($schoolParts); } } if (preg_match('#<br>[\\n\\s]<br>(.+?)<br>[\\n\\s]<br>[\\n\\s]<img#isU', $text, $match)) { $bio = strip_tags(trim($match[1])); } else { $wikipedia = new LsWikipedia(); if ($wikipedia->request($name)) { $bio = $wikipedia->getIntroduction(); } } //get image $regexp = '#([A-Z1-9]{4}).html#'; if (preg_match($regexp, $url, $match)) { $imageFilename = $match[1] . ".jpg"; $imageUrl = $this->list_urls[$this->year]['img_src'] . $imageFilename; } //echo "Rank: " . $rank . "\n"; $this->printDebug("Rank: " . $rank); $this->printDebug("Name: " . $name); $this->printDebug("Image: " . $imageUrl); $this->printDebug("Net worth: " . $netWorth); $this->printDebug("Birth year: " . $birthYear); $this->printDebug("Bio: " . $bio); $person = $this->generatePerson($name, $bio); $person_exists = $this->getBusinessPersonQuery()->addWhere("person.name_first = ? AND person.name_last = ?", array($person->name_first, $person->name_last))->fetchOne(); if ($person_exists != false) { $this->printDebug('Person exists'); $person = $person_exists; } else { $this->printDebug('Saving new person'); } //parse name and create person object $person->addExtension('BusinessPerson'); $person->start_date = $person->start_date == null ? $birthYear : $person->start_date; $person->summary = $person->summary == null ? $bio : $person->summary; $person->net_worth = $person->net_worth == null ? $netWorth : $person->net_worth; //go through schools person attended foreach ($schools as $school) { //does the current school exist? $current_school = EntityTable::getByExtensionQuery('Org')->addWhere("org.name = ?", $school['org'])->fetchOne(); if ($current_school) { $this->printDebug(" Found School " . $school['org']); } else { //clear cache Doctrine::getTable('ExtensionDefinition')->clear(); $current_school = new Entity(); $current_school->addExtension('Org'); $current_school->addExtension('School'); $current_school->name = LsLanguage::titleize($school['org']); $current_school->save(); $current_school->addReference($source = $url, $excerpt = null, $fields = array('name'), $name = 'Forbes.com', $detail = null, $date = null); $this->printDebug(" Adding new school: " . $school['org']); } //if there is no relationship between person and school. connect them! if (!$person->getRelationshipsWithQuery($current_school, RelationshipTable::EDUCATION_CATEGORY)->fetchOne()) { $this->printDebug(" Creating Relation between " . $current_school->name . " and " . $person->name); $education = new Relationship(); $education->Entity1 = $person; $education->Entity2 = $current_school; $education->setCategory('Education'); $education->description1 = $school['degree']; $education->is_current = 1; $education->save(); $education->addReference($source = $url, $excerpt = null, $fields = array('description1'), $name = 'Forbes.com', $detail = null, $date = null); } } $person->save(); $person->addReference($source = $url, $excerpt = null, $fields = array('name_prefix', 'name_first', 'name_middle', 'name_last', 'name_suffix', 'name_nick', 'summary', 'net_worth', 'start_date'), $name = 'Forbes.com', $detail = null, $date = null); $this->saveToList($person, $rank); $this->attachImage($person, $imageUrl); } else { echo "Couldn't get person: " . $url . "\n"; } }
function parseRecipients($contribution) { $recipients = $this->getRecipients($contribution); $this->printDebug(" Number of recipients " . count($recipients)); foreach ($recipients as $recipient) { $candidate = $this->getCandidateInfo($recipient[0]); $committee = $this->getCommitteeInfo($recipient[0]); $committee_name = trim($committee[2]); $committee_fec_id = trim($committee[1]); //CHECK FOR EXISTING COMMITTEE unset($current_committee); $current_committee = EntityTable::getByExtensionQuery(array('Org', 'PoliticalFundraising'))->addWhere("org.name = ?", $committee_name)->fetchOne(); if ($current_committee) { $this->printDebug(" Found Committee " . $committee_name . " (" . $committee_fec_id . ")"); } else { //clear cache Doctrine::getTable('ExtensionDefinition')->clear(); $current_committee = new Entity(); $current_committee->addExtension('Org'); $current_committee->addExtension('PoliticalFundraising'); $current_committee->name = LsLanguage::titleize($committee_name); $current_committee->fec_id = $committee_fec_id; $current_committee->save(); $current_committee->addReference($source = $this->fecCommitteeUrl . $committee_fec_id, $excerpt = null, $fields = array('name', 'fec_id'), $name = 'FEC Disclosure Report', $detail = null, $date = null, false); $this->printDebug(" Adding new committee: " . $committee_name . " (" . $committee_fec_id . ")"); } $this->committee = $current_committee; $this->updateCommitteeDetails($current_committee); $transactions = $this->getTransactions($recipient[0]); //RECORD DONATIONS $validate_existance_of_donation = true; foreach ($transactions as $transaction) { list($month, $day, $year) = explode('/', $transaction[1]); $donation_amount = $transaction[2]; $donation_fec_id = $transaction[4]; $donation_date = $year . '-' . $month . '-' . $day; if ($this->hasMeta($this->person->id, $donation_fec_id) && !$this->forceScaper) { $this->printDebug("#{$donation_fec_id} Already scraped"); continue; } if ($validate_existance_of_donation) { $donation_exists = FecFilingTable::getFecFiling($donation_fec_id); $validate_existance_of_donation = false; } if (!$donation_exists) { $this->printDebug(" Donation exists: FALSE "); $this->printDebug(" Donation ({$donation_fec_id}): " . $donation_amount . " on " . $donation_date); $this->printDebug(" Creating relationship between \"" . $this->person->name_first . " " . $this->person->name_last . "\" and \"" . $current_committee->name . "\""); $filing = new FecFiling(); $filing->amount = $donation_amount; $filing->fec_filing_id = $donation_fec_id; $filing->start_date = $donation_date; $filing->end_date = $donation_date; $relationship = null; if ($relationship = $this->person->getRelationshipsWithQuery($current_committee, RelationshipTable::DONATION_CATEGORY)->fetchOne()) { $relationship->addFecFiling($filing); } else { $relationship = new Relationship(); $relationship->Entity1 = $this->person; $relationship->Entity2 = $current_committee; $relationship->setCategory('Donation'); $relationship->description1 = 'Campaign Contribution'; $relationship->is_current = 1; $relationship->save(); $relationship->addFecFiling($filing); $relationship->addReference($source = self::$fecImageUrl . $donation_fec_id, $excerpt = null, $fields = array('amount', 'start_date', 'end_date', 'description1'), $name = 'FEC Filing', $detail = null, $date = null); $filing->save(); $relationship->addReference($source = $this->_url, $excerpt = null, $fields = array('amount', 'start_date', 'end_date', 'description1'), $name = 'FEC contribution search', $detail = null); if ($this->_entity_reference == false) { $this->person->addReference($source = $this->_url, $excerpt = null, $fields = null, $name = 'FEC contribution search'); $this->_entity_reference = true; } } $this->saveMeta($this->person->id, $donation_fec_id, 1); } else { $this->printDebug(" Donation exists: TRUE"); break; } } } $this->printDebug("+ Adding Donation: COMPLETE\n"); }
function import(Entity $person, $possible_persons) { //loop through the people we found. usually just one. foreach ($possible_persons as $possible_person) { $this->printDebug('Query returned ' . count($possible_person) . ' person named ' . $possible_person->name); //this person does not provide education. we skip if (count($possible_person->education)) { $this->printDebug('Education found'); } else { $this->printDebug('No education history found'); continue; } //get employement info for this possible match $possible_person_bio = $possible_person->summary; if (count($possible_person->employment_history)) { foreach ($possible_person->employment_history as $employment) { $possible_person_bio .= ' ' . $employment->company . " "; } $this->printDebug('Employment found'); } else { $this->printDebug('No employment history found'); continue; } //get employment info for the person in our database $relationship_orgs = $person->getRelatedEntitiesQuery('Org', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 1)->execute(); $person_bio = $person->summary; foreach ($relationship_orgs as $org) { $person_bio .= ' ' . $org->name; } //lets see how many matches we get $matches = LsLanguage::getCommonPronouns($person_bio, trim($possible_person_bio), LsLanguage::$business); if (count($matches)) { foreach ($possible_person->education as $school) { $school->institution = mb_convert_encoding($school->institution, 'UTF-8'); $school->institution = preg_replace('//isu', ' ', $school->institution); $this->printDebug('Looking for the school: ' . $school->institution); $current_school = EntityTable::findByAlias($school->institution, $context = 'bw_school'); //find school if ($current_school) { $this->printDebug('Found school'); } else { $current_school = EntityTable::getByExtensionQuery(array('Org', 'School'))->addWhere('LOWER(org.name) LIKE ?', '%' . strtolower($school->institution) . "%")->fetchOne(); if (!$current_school) { $new_school = new Entity(); $new_school->addExtension('Org'); $new_school->addExtension('School'); $new_school->name = $school->institution; $wikipedia = new LsWikipedia(); $wikipedia->request($school->institution); if ($wikipedia->execute() && !$wikipedia->isDisambiguation()) { $info_box = $wikipedia->getInfoBox(); if (isset($info_box['students']) && preg_match('/([\\d\\,]{2,})/isu', $info_box['students']['clean'], $match)) { $new_school->students = LsNumber::clean($match[1]); } else { $student_types = array('undergrad', 'postgrad', 'grad', 'doctoral'); $num_students = 0; foreach ($student_types as $st) { if (isset($info_box[$st]) && preg_match('/([\\d\\,]{2,})/isu', $info_box[$st]['clean'], $match)) { $num_students += LsNumber::clean($match[1]); } } if ($num_students > 0) { $new_school->students = $num_students; } } if (isset($info_box['faculty']) && preg_match('/([\\d\\,]{2,})/isu', $info_box['faculty']['clean'], $match)) { $new_school->faculty = LsNumber::clean($match[1]); } if (isset($info_box['type'])) { if (stristr($info_box['type']['clean'], 'public')) { $new_school->is_private = 0; } else { if (stristr($info_box['type']['clean'], 'private')) { $new_school->is_private = 1; } } } if (isset($info_box['endowment'])) { if (preg_match('/(\\$[\\d\\,\\.\\s]+)(million|billion)/isu', $info_box['endowment']['clean'], $match)) { if (strtolower($match[2]) == 'billion') { $factor = 1000000000; } else { $factor = 1000000; } $new_school->endowment = LsNumber::formatDollarAmountAsNumber($match[1], $factor); } } if (isset($info_box['established'])) { $year = null; if ($date = LsDate::convertDate($info_box['established']['clean'])) { $new_school->start_date = $date; } else { if (preg_match('/\\b(\\d\\d\\d\\d)\\b/isu', $info_box['established']['clean'], $match)) { $new_school->start_date = $match[1]; } } } $summary = trim($wikipedia->getIntroduction()); $summary = preg_replace('/\\n\\s*\\n/isu', '', $summary); if (strlen($summary) > 10) { $new_school->summary = $summary; } $new_school->save(); $new_school->addReference($source = $wikipedia->getUrl(), $excerpt = null, $fields = array('summary'), $name = 'Wikipedia'); } else { $new_school->save(); } $current_school = $new_school; $this->printDebug('Adding new school'); } $alias = new Alias(); $alias->name = $school->institution; $alias->context = 'bw_school'; $alias->Entity = $current_school; $alias->save(); } //find degree $degree = null; if (!($degree = DegreeTable::getByText($school->degree))) { $degree = DegreeTable::addDegree($school->degree); $this->printDebug('Adding new degree'); } //find relationship $relationship = null; $relationships = $person->getRelationshipsWithQuery($current_school, RelationshipTable::EDUCATION_CATEGORY)->execute(); foreach ($relationships as $existing_relationship) { if ($existing_relationship->degree_id == $degree->id) { $relationship = $existing_relationship; break; } } if ($relationship) { $this->printDebug('Relationship between person and school exists'); } else { $relationship = new Relationship(); $relationship->Entity1 = $person; $relationship->Entity2 = $current_school; $relationship->description1 = 'student'; $relationship->is_current = 0; if ($school->year) { $relationship->end_date = $school->year; } $relationship->setCategory('Education'); $this->printDebug('Creating new relationship between person and school'); } //save $relationship->save(); //add degree and reference if ($relationship->degree_id == null) { $reference_name = strstr($school->source, 'wikipedia') ? "Wikipedia" : "BusinessWeek"; $relationship->Degree = $degree; $relationship->save(); $relationship->addReference($source = $school->source, $excerpt = null, $fields = array('degree_id'), $name = $reference_name, $detail = null, $date = null); $this->printDebug('Adding degree and reference'); } } } else { $this->printDebug('No organization matches'); return false; } } return true; }
protected function importGovernor($row) { $url = $this->_baseUrl . $row['url']; if (!$this->browser->get($url)->responseIsError()) { $text = $this->browser->getResponseText(); $text = LsHtml::replaceEntities($text); //preg_match('/>Family\:<\/b>([^<]*)<br/is',$text,$family_arr); $name = trim(str_ireplace('Gov.', '', $row['name'])); $this->printDebug(''); $this->printDebug($name . ':'); $governor = PersonTable::parseFlatName($name); $governor->addExtension('PoliticalCandidate'); $governor->addExtension('ElectedRepresentative'); $governor->is_state = 1; $similar = $governor->getSimilarEntitiesQuery(true)->execute(); foreach ($similar as $s) { $sim_re = LsString::escapeStringForRegex($s->name_first); $search_re = LsString::escapeStringForRegex($governor->name_first); if (preg_match('/^' . $sim_re . '/su', $governor->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) { continue; } $bio = $s->getExtendedBio(); if (preg_match('/\\bgovernor(ship)?\\b/isu', $bio)) { $governor = $s; $this->printDebug(' Found existing governor: ' . $s->name . ' ' . $s->id); break; } } $governor->save(); $this->printDebug($governor->id); if (!$governor->start_date && preg_match('/>Born\\:<\\/b>([^<]*)<br/is', $text, $birth_arr)) { $this->printDebug(' Birthdate: ' . $birth_arr[1]); $governor->start_date = trim($birth_arr[1]); } if (!$governor->birthplace && preg_match('/>Birth State\\:<\\/b>([^<]*)<br/is', $text, $birth_state_arr)) { $this->printDebug(' Birthplace: ' . trim($birth_state_arr[1])); $governor->birthplace = trim($birth_state_arr[1]); } //PARTY MEMBERSHIP if (preg_match('/>Party\\:<\\/b>([^<]*)<br/is', $text, $party_arr)) { $party_str = $party_arr[1]; $this->printDebug(' Party: ' . $party_str); if (stristr($party_str, 'Democrat')) { $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Democratic Party')->fetchOne(); } if (stristr($party_str, 'Republican')) { $party = EntityTable::getByExtensionQuery('PoliticalParty')->addWhere('name = ?', 'Republican Party')->fetchOne(); } if (isset($party) && $party && !$governor->party_id) { $governor->Party = $party; $governor->is_independent = false; $this->printDebug(' Added membership in ' . $party); } else { if (stristr($party_str, 'Independent')) { $governor->is_independent = true; } } } if (!$governor->summary && preg_match_all('/>([^<]{240,})/isu', $text, $bio_match)) { $str = ''; foreach ($bio_match[1] as $b) { if (!stristr($b, 'Javascript')) { $str .= "\n\n" . $b; } } $str = trim($str); if (strlen($str)) { $governor->summary = $str; } } $governor->save(); $governor->addReference($url, null, $governor->getAllModifiedFields(), 'Governors Association'); //SCHOOLS if (preg_match('/>School\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $school_arr)) { $school_names = explode(';', trim($school_arr[1])); if (count($school_names) == 1) { $school_names = explode(',', $school_names[0]); } foreach ($school_names as $school_name) { $school_name = trim($school_name); if (!($school = EntityTable::getByExtensionQuery('School')->leftJoin('e.Alias a')->addWhere('e.name = ? or a.name = ?', array($school_name, $school_name))->fetchOne())) { $school = new Entity(); $school->addExtension('Org'); $school->addExtension('School'); $school->name = $school_name; $school->save(); $this->printDebug(' Added School: ' . $school_name); } $q = RelationshipTable::getByCategoryQuery('Education')->addWhere('entity1_id = ? and entity2_id = ?', array($governor->id, $school->id))->fetchOne(); if (!$q) { $relationship = new Relationship(); $relationship->setCategory('Education'); $relationship->Entity1 = $governor; $relationship->Entity2 = $school; $relationship->is_current = 0; $relationship->save(); $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added education: ' . $relationship->name); } } } //GOVERNOR OFFICE AND POSITION $office_name = 'Office of the Governor of ' . $row['state']; if (!($office = EntityTable::getByExtensionQuery('GovernmentBody')->addWhere('name = ?', $office_name)->fetchOne())) { $office = new Entity(); $office->name = $office_name; $office->addExtension('Org'); $office->addExtension('GovernmentBody'); $state = Doctrine::getTable('AddressState')->findOneByName($row['state']); if ($state) { $office->state_id = $state->id; } $office->save(); $office->addReference($url, null, $office->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added office: ' . $office->name); } $q = RelationshipTable::getByCategoryQuery('Position')->addWhere('entity1_id = ? and entity2_id = ? and description1 = ?', array($governor->id, $office->id, 'Governor'))->fetchOne(); if (!$q) { sort($row['years']); $i = 0; while ($i < count($row['years'])) { $governorship = new Relationship(); $governorship->setCategory('Position'); $governorship->Entity1 = $governor; $governorship->Entity2 = $office; $governorship->description1 = 'Governor'; $governorship->start_date = $row['years'][$i]; $i++; if (isset($row['years'][$i])) { $governorship->end_date = $row['years'][$i]; $governorship->is_current = 0; if (!$governor->blurb && !isset($row['years'][$i + 1])) { $governor->blurb = 'Former Governor of ' . $row['state']; } } else { $governorship->is_current = 1; if (!$governor->blurb) { $governor->blurb = 'Governor of ' . $row['state']; } } $governor->save(); $i++; $governorship->save(); $governorship->addReference($url, null, $governorship->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added governorship: ' . $governorship->name); } } //SPOUSE if (preg_match('/>Spouse\\:<\\/b>(.*?)<br/is', $text, $spouse_arr)) { $spouse = trim(LsHtml::stripTags($spouse_arr[1])); $q = RelationshipTable::getByCategoryQuery('Family')->addWhere('entity1_id = ? or entity2_id = ?', array($governor->id, $governor->id))->fetchOne(); if (!$q && strlen($spouse)) { $spouse = PersonTable::parseFlatName($spouse); $spouse->save(); $this->printDebug(' Added spouse: ' . $spouse->name); $relationship = new Relationship(); $relationship->setCategory('Family'); $relationship->Entity1 = $spouse; $relationship->Entity2 = $governor; $relationship->description1 = 'Spouse'; $relationship->description2 = 'Spouse'; $relationship->save(); $relationship->addReference($url, null, $relationship->getAllModifiedFields(), 'Governors Association'); $this->printDebug(' Added spouse relationship: ' . $relationship->name); } } //ADDRESS --not working, malformed addresses /* if (preg_match('/>Address\:\s*<\/b>(.*?)<b>/is',$text,$address_arr)) { $address = trim(str_replace('<br/>',', ',$address_arr[1])); $this->printDebug($address); if ($governor->Address->count() == 0 && $a = $governor->addAddress($address)) { $this->printDebug(' Address: ' . $a); $governor->save(); } }*/ //PHONE NUMBER if (preg_match('/>Phone\\(s\\)\\:<\\/b>([^<]*)<br/is', $text, $phone_arr)) { $phone_number = trim($phone_arr[1]); if (!$governor->Phone->count()) { $phone = $governor->addPhone($phone_number); $this->printDebug(' Phone: ' . $phone); } } if (!$governor->Image->count() && preg_match('/<img .*?class\\="display" src\\="([^"]*)"/is', $text, $img_arr)) { $url = $img_arr[1]; try { $fileName = ImageTable::createFiles($url, $governor->name_first); } catch (Exception $e) { $fileName = null; } if ($fileName) { //insert image record $image = new Image(); $image->filename = $fileName; $image->entity_id = $governor->id; $image->title = $governor->name; $image->caption = 'From Governors Association website'; $image->is_featured = true; $image->is_free = false; $image->url = $url; $image->save(); $this->printDebug("Imported image: " . $image->filename); } } } }
protected function processRow($row) { foreach ($row as &$r) { $r = trim($r); } $edit = array('Search Name' => $row['name'], 'Affiliation Name' => $row['affiliation1'], 'Similar Names' => array(), 'New Person' => null, 'Existing Person' => null, 'New Org' => null, 'Existing Org' => null, 'New Relationship' => null); try { $this->db->beginTransaction(); $person = null; $search_person = PersonTable::parseFlatName($row['name']); $similar = $search_person->getSimilarEntitiesQuery(true)->execute(); $matched_bio = false; $similar_ids = array(); foreach ($similar as $s) { $similar_ids[] = $s->id; $sim_re = LsString::escapeStringForRegex($s->name_first); $search_re = LsString::escapeStringForRegex($search_person->name_first); if (preg_match('/^' . $sim_re . '/su', $search_person->name_first) == 0 && preg_match('/^' . $search_re . '/su', $s->name_first) == 0) { continue; } $matched = false; $affils = array(); $ct = 1; $matched_affils = array(); $unmatched_affils = array(); while (isset($row['affiliation' . $ct]) && trim($row['affiliation' . $ct]) != '') { $affil = trim($row['affiliation' . $ct]); $org = $s->checkAffiliations(array($affil)); if ($org) { $matched_affils[] = array($org, $affil); $edit['Existing Org'] = $org->id; break; } else { $unmatched_affils[] = $affil; } $ct++; } if (count($matched_affils)) { $person = $s; break; //$ret[] = array('person' => $s, $matched_affils, $unmatched_affils); } else { /*$str = implode(' ', $unmatched_affils); if (isset($row['bio'])) { $str .= ' ' . $row['bio']; }*/ $bio = $s->getExtendedBio(); foreach ($unmatched_affils as $affil) { $affil = OrgTable::removeSuffixes($affil); $this->printDebug($affil); $this->printDebug($bio); if (preg_match('/' . OrgTable::getNameRegex($affil) . '/su', $bio)) { $matched_bio = true; break; } } if ($matched_bio) { $person = $s; break; } else { $this->printDebug(' ' . $s->name . ' failed'); } } } $edit['Similar Names'] = array_slice($similar_ids, 0, 5); $no_match = false; if (!$person) { if (isset($row['bio']) && trim($row['bio']) != '') { $search_person->summary = $row['bio']; } $search_person->save(); $this->printDebug(' not found, new person saved: ' . $search_person->name); $search_person->addReference($this->source_url, null, null, $this->source_name); $no_match = true; $edit['New Person'] = $search_person->id; $person = $search_person; } else { if (isset($row['bio']) && trim($row['bio']) != '' && !$person->summary) { $person->summary = $row['bio']; $person->save(); } $this->printDebug(' **person found: ' . $person->name); $edit['Existing Person'] = $person->id; } if ($matched_bio || $no_match) { $orgs = OrgTable::getOrgsWithSimilarNames($row['affiliation1'], true); $max = -1; $affiliated_org = null; foreach ($orgs as $org) { $this->printDebug(' found match: ' . $org->name); $ct = $org->getRelatedEntitiesQuery('Person', RelationshipTable::POSITION_CATEGORY, null, null, null, false, 2)->count(); if ($ct > $max) { $affiliated_org = $org; $edit['Existing Org'] = $affiliated_org->id; $max = $ct; } } if (!$affiliated_org) { $affiliated_org = new Entity(); $affiliated_org->addExtension('Org'); if (isset($row['affiliation1_extensions']) && $row['affiliation1_extensions'] != '') { $extensions = explode(',', $row['affiliation1_extensions']); foreach ($extensions as $ext) { $ext = trim($ext); if (in_array($ext, ExtensionDefinitionTable::$extensionNames)) { $affiliated_org->addExtension($ext); } } } else { //$affiliated_org->addExtension('Business'); } $affiliated_org->name = $row['affiliation1']; $affiliated_org->save(); $affiliated_org->addReference($this->source_url, null, null, $this->source_name); $edit['New Org'] = $affiliated_org->id; } $rel = new Relationship(); $rel->Entity1 = $person; $rel->Entity2 = $affiliated_org; $rel->setCategory('Position'); if (isset($row['affiliation1_title']) && $row['affiliation1_title'] != '') { $description = trim($row['affiliation1_title']); $rel->description1 = $description; if ($description == 'Director' || $description == 'Trustee' || preg_match('/^Chair/su', $description)) { $rel->is_board = 1; $rel->is_employee = 0; } } $rel->save(); $rel->addReference($this->source_url, null, null, $this->source_name); $edit['New Relationship'] = $rel->id; } if (isset($row['start_date']) && trim($row['start_date']) != '') { $edit['Relationship']['start_date'] = trim($row['start_date']); } if (isset($row['end_date']) && trim($row['end_date']) != '') { $edit['Relationship']['end_date'] = trim($row['end_date']); } if (isset($row['title']) && trim($row['title']) != '') { $edit['Relationship']['title'] = trim($row['title']); } if (isset($row['notes']) && trim($row['notes']) != '') { $edit['Relationship']['notes'] = trim($row['notes']); } if (isset($row['rank']) && $row['rank'] != '') { $edit['rank'] = $row['rank']; } $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } $this->edits[] = $edit; }
$layerEntity = new Entity($database, "layer"); $layerEntity->setPresentation("title", "foto_filename"); $layerEntity->addField("title", VARCHAR, 50); $layerEntity->addField("subtitle", VARCHAR, 50); $layerEntity->addField("description", TEXT); $layerEntity->addField("foto", FILE); $layerEntity->addReference($bgEntity, "bg_id"); $layerEntity->addReference($pageEntity, "page_id"); $layerEntity->addField("position", POSITION); $layerEntity->connect(); $msgEntity = new Entity($database, "message"); $msgEntity->setPresentation("title"); $msgEntity->addField("title", VARCHAR, 255); $msgEntity->addField("description", TEXT); $msgEntity->addField("position", POSITION); $msgEntity->connect(); /* MODULISTICA + CATEGORIE MODULI */ $catModuleEntity = new Entity($database, "catmodule"); $catModuleEntity->setPresentation("name"); $catModuleEntity->addField("name", VARCHAR, 50); $catModuleEntity->addField("position", POSITION); $catModuleEntity->connect(); /* *** */ $moduleEntity = new Entity($database, "module", WITH_OWNER); $moduleEntity->setPresentation("name"); $moduleEntity->addField("name", VARCHAR, 100); $moduleEntity->addField("description", TEXT); $moduleEntity->addField("position", POSITION); $moduleEntity->addField("file", FILE); $moduleEntity->addReference($catModuleEntity, "category"); $moduleEntity->connect();
protected function import($url) { $company = null; if (!$this->browser->get($url)->responseIsError()) { $text = $this->browser->getResponseText(); $rank = null; $name = null; $industryName = null; $street1 = null; $street2 = null; $city = null; $state = null; $postal = null; $phone = null; $fax = null; $website = null; $blurb = null; $summary = null; $revenue = null; $employees = null; $ceoName = null; $ceoBirthYear = null; //get rank if ($this->year > 1999 && $this->year < 2005 && preg_match('/ForbesListRank" content="(\\d+)"/i', $text, $match)) { $rank = $match[1]; } elseif ($this->year < 2000 && preg_match('/td class="highlightcolor1">(\\d+)/i', $text, $match)) { $rank = $match[1]; } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) { $rank = html_entity_decode($match[1]); } //get name if ($this->year > 1995 && $this->year < 2005 && preg_match('/span class="mainlisttitle">([^<]+)<\\/span>/i', $text, $match)) { $name = html_entity_decode($match[1]); } elseif ($this->year > 2004 && preg_match('/<b>#(\\d+) ([^<]+)<\\/b>/i', $text, $match)) { $name = html_entity_decode($match[2]); } else { $this->printDebug("Company name not found"); return; } //get industry if ($this->year > 1995 && $this->year < 2001 && preg_match('/<b>See more private companies in <a [^>]+>([^<]+)<\\/a><\\/b>/ism', $text, $match)) { $industryName = trim(html_entity_decode($match[1])); } elseif ($this->year > 2000 && $this->year < 2005 && preg_match('/private companies\\<\\/a> in ([^\\.]+)/ism', $text, $match)) { $industryName = trim(html_entity_decode($match[1])); } elseif ($this->year > 2004 && preg_match('/<b>Industry:<\\/b> <a href="[^"]+">([^<]+)<\\/a>/ism', $text, $match)) { $industryName = trim(html_entity_decode($match[1])); } //get address if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt"\\>(.+)phone/smU', $text, $match)) { $contactLines = explode('<br>', trim($match[1])); array_pop($contactLines); $street1 = $contactLines[0]; $street2 = count($contactLines) == 3 ? $contactLines[2] : null; $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]); $city = $city_state_zip['city']; $state = $city_state_zip['state']; $postal = $city_state_zip['zip']; } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/(view private companies under this industry|in the same industry).+<br><br>(.+)phone/is', $text, $match)) { var_dump($match); $contactLines = explode('<br>', trim($match[1])); array_pop($contactLines); $street1 = $contactLines[0]; $street2 = count($contactLines) == 3 ? $contactLines[2] : null; $city_state_zip = count($contactLines) == 3 ? LsLanguage::parseCityStatePostal($contactLines[2]) : LsLanguage::parseCityStatePostal($contactLines[1]); $city = $city_state_zip['city']; $state = $city_state_zip['state']; $postal = $city_state_zip['zip']; } elseif ($this->year > 2004 && preg_match('/<div class="spaced">(.+)<\\/div>/ismU', $text, $match)) { $contactLines = explode('<br>', $match[1]); if (!preg_match('/Phone\\:|Fax\\:/i', $contactLines[0]) && !preg_match('/Phone\\:|Fax\\:/i', $contactLines[1])) { $street1 = trim($contactLines[0]); if (count($contactLines) == 4) { if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[1]), $match)) { $city = $match[1]; $state = $match[2]; $postal = $match[3]; } } elseif (count($contactLines) == 5) { $street2 = $contactLines[1]; if (preg_match('/^(.+?) ([A-Z]{2}) (\\d{5})($|-)/sU', trim($contactLines[2]), $match)) { $city = $match[1]; $state = $match[2]; $postal = $match[3]; } } } } //get phone if ($this->year > 1995 && $this->year < 2005 && preg_match('/phone ([\\d\\-]{12})/is', $text, $match)) { $phone = trim(str_replace('-', '', $match[1])); } elseif ($this->year > 2004 && preg_match('/Phone: ([\\d\\-]{12})/is', $text, $match)) { $phone = trim(str_replace('-', '', $match[1])); } //get fax if ($this->year > 1995 && $this->year < 2005 && preg_match('/fax ([\\d\\-]{12})/is', $text, $match)) { $fax = trim(str_replace('-', '', $match[1])); } else { if ($this->year > 2004 && preg_match('/Fax: ([\\d\\-]{12})/is', $text, $match)) { $fax = trim(str_replace('-', '', $match[1])); } } //get website if ($this->year > 1995 && $this->year < 2005 && preg_match('/this company\'s web site[^>]+\\>(http[^\\<]+)/is', $text, $match)) { $website = $match[1]; } elseif ($this->year > 2004 && preg_match('/<div class="spaced">.*<\\/div>\\s+<br>\\s+<a href="(http:\\/\\/[^"]+)">/ismU', $text, $match)) { $website = $match[1]; } //get ceo if ($this->year > 1995 && $this->year < 2005 && preg_match('/b>CEO: ([^<]+)<\\/b>/ism', $text, $match)) { $ceoName = $match[1]; } elseif ($this->year > 2004 && preg_match('/CEO: ([^<]+)<\\/b> , (\\d+) <br>/ism', $text, $match)) { $ceoName = html_entity_decode($match[1]); $ceoBirthYear = date("Y"); -$match[2]; } //get summary if ($this->year > 1995 && $this->year < 2000 && preg_match_all('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) { $summary = str_replace(array(' ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1][1])))); } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/p class="mainlisttxt">(.*)<\\/p>/ismU', $text, $match)) { $summary = str_replace(array(' ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1])))); } elseif ($this->year > 2004 && preg_match('/<blockquote class="spaced">(.*)<\\/blockquote>/ismU', $text, $match)) { $summary = str_replace(array(' ', "\n"), array(' ', ' '), html_entity_decode(trim(strip_tags($match[1])))); } //get revenue if ($this->year > 1995 && $this->year < 2000 && preg_match('/<td class="mainlisttxt">\\$([\\S]+) mil<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) { $this->printDebug($match[1]); $revenue = str_replace(",", "", $match[1] . ",000,000"); } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<td class="mainlisttxt" nowrap>([^<]+)<sup>e?<\\/sup><\\/td>/ismU', $text, $match)) { $this->printDebug($match[1]); $revenue = str_replace(",", "", $match[1] . ",000,000"); } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">\\$([\\S]+) bil.*<\\/td> <td class="highlight" nowrap="nowrap">[^<]+<\\/td> <td class="highlight" nowrap="nowrap">([^<]+)<\\/td>/ismU', $text, $match)) { $revenue = 1000000000 * $match[1]; } //get employees if ($this->year > 1995 && $this->year < 2005 && preg_match('/mil<\\/td>.+<td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<\\/td>.+<td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) { $employees = str_replace(',', '', $match[2]); } elseif ($this->year > 1999 && $this->year < 2005 && preg_match('/<sup>e?<\\/sup><\\/td> <td class="mainlisttxt"( nowrap)?>(\\d[^<]+)<sup>e?<\\/sup><\\/td> <td class="mainlisttxt">[a-zA-Z]+<\\/td>/ismU', $text, $match)) { $employees = str_replace(',', '', $match[2]); } elseif ($this->year > 2004 && preg_match('/<td class="highlight" nowrap="nowrap">([\\d,]+)<\\/td> <td class="highlight" nowrap="nowrap">[A-Z][a-z]{2,}<\\/td>/', $text, $match)) { $employees = str_replace(',', '', $match[1]); } /*$this->printDebug( "URL: ". $url); $this->printDebug( "Rank: " . $rank ); $this->printDebug( "Name: " . $name ); $this->printDebug( "Industry: " . $industryName ); $this->printDebug( "Street: " . $street1 ); $this->printDebug( "Street2: " . $street2 ); $this->printDebug( "City: " . $city ); $this->printDebug( "State: " . $state ); $this->printDebug( "Postal: " . $postal ); $this->printDebug( "Phone: " . $phone ); $this->printDebug( "Fax: " . $fax ); $this->printDebug( "Website: " . $website ); $this->printDebug( "CEO: " . $ceoName . " " . $ceoBirthYear); $this->printDebug( "Summary: " . $summary ); $this->printDebug( "Revenue: " . $revenue ); $this->printDebug( "Employees: " . $employees );*/ $search_company_name = trim(implode(' ', array_diff(explode(' ', ucwords(strtolower($name))), array_merge(LsLanguage::$business, LsLanguage::$businessAbbreviations)))); //continue; $this->printDebug("{$search_company_name} == {$name}"); if ($company = EntityTable::getByExtensionQuery(array('Org', 'PrivateCompany'))->addWhere("LOWER(REPLACE( org.name, '-' , '')) = ?", strtolower($name))->fetchOne()) { $this->printDebug("Company exists"); $company->revenue = $revenue; $company->save(); } else { $this->printDebug("Creating new company {$name}"); Doctrine::getTable('ExtensionDefinition')->clear(); $company = new Entity(); $company->addExtension('Org'); $company->addExtension('Business'); $company->addExtension('PrivateCompany'); $company->name = LsLanguage::titleize($name); $company->employees = strlen($employees) ? $employees : null; $company->revenue = strlen($revenue) ? $revenue : null; $company->website = strlen($website) ? $website : null; $company->summary = strlen($summary) ? trim($summary) : null; //add address if ($phone) { $company->addPhone($phone); } if ($fax) { //$company->addPhone($fax); } if ($city && $state) { $address = new Address(); $address->street1 = strlen($street1) ? $street1 : null; $address->street2 = strlen($street2) ? $street2 : null; $address->city = strlen($city) ? $city : null; if ($state = AddressStateTable::retrieveByText($state)) { $address->State = $state; } $address->postal = $postal; $company->addAddress($address); $address->save(); $address->addReference($source = $url, $excerpt = null, $fields = array('city', 'country_id', 'postal', 'state_id', 'street1'), $name = 'Forbes.com', $detail = null, $date = null); } } /*$this->printDebug( "URL: ". $url); $this->printDebug( "Rank: " . $rank ); $this->printDebug( "Name: " . $name ); $this->printDebug( "Industry: " . $industryName ); $this->printDebug( "Street: " . $street1 ); $this->printDebug( "Street2: " . $street2 ); $this->printDebug( "City: " . $city ); $this->printDebug( "State: " . $state ); $this->printDebug( "Postal: " . $postal ); $this->printDebug( "Phone: " . $phone ); $this->printDebug( "Fax: " . $fax ); $this->printDebug( "Website: " . $website ); $this->printDebug( "CEO: " . $ceoName . " " . $ceoBirthYear); $this->printDebug( "Summary: " . $summary ); $this->printDebug( "Revenue: " . $revenue ); $this->printDebug( "Employees: " . $employees );*/ $company->save(); $company->addReference($source = $url, $excerpt = null, $fields = array('website', 'name', 'website', 'summary', 'revenue', 'employees'), $name = 'Forbes.com', $detail = null, $date = null); $this->saveToList($company, $rank); } else { $this->printDebug("Couldn't get company: " . $url); } }
$luoghiEntity->addReference($fotoEntity, "id_foto"); $luoghiEntity->connect(); //ITINERARIO// $itinerarioEntity = new Entity($database, "itinerario"); $itinerarioEntity->setPresentation("username_users"); $itinerarioEntity->addPrimaryKey("id", INT, 255); $itinerarioEntity->addReference($usersEntity, "username_users"); $itinerarioEntity->addReference($luoghiEntity, "id_luoghi"); $itinerarioEntity->connect(); //RECENSIONI// $recensioniEntity = new Entity($database, "recensioni"); $recensioniEntity->setPresentation("titolo"); $recensioniEntity->addPrimaryKey("id", INT, 255); $recensioniEntity->addField("commento", VARCHAR, 255); $recensioniEntity->addField("titolo", VARCHAR, 255); $recensioniEntity->addReference($usersEntity, "username_users"); $recensioniEntity->addReference($luoghiEntity, "id_luoghi"); $recensioniEntity->connect(); //FOTO// /* $immaginiEntity = new Entity($database, "immagini"); $immaginiEntity->addPrimaryKey("id", INT, 255); $immaginiEntity->addField("script", VARCHAR, 255); $immaginiEntity->addField("id_galleria", INT, 255); $immaginiEntity->connect(); //////////////////////////////////////////////////////////////////////FOTO E GALLERIA USATE DI LEARNPAD//////////////////////////////////// //GALLERIA// $galleriaEntity = new Entity($database, "galleria"); $galleriaEntity->setPresentation("titolo"); $galleriaEntity->addPrimaryKey("id", INT, 255); $galleriaEntity->addField("titolo", VARCHAR, 255);
public function executeAddBulk($request) { $this->checkList($request, false, false); $this->reference_form = new ReferenceForm(); $this->reference_form->setSelectObject($this->list); $this->csv_form = new CsvUploadForm(); if ($request->isMethod('post')) { $commit = $request->getParameter('commit'); if ($commit == 'Cancel') { $this->redirect(LsListTable::getInternalUrl($this->list)); } // IF REFERENCE INFO AND FILE HAVE BEEN SUBMITTED, LOAD DATA IN if ($request->hasParameter('reference') && $request->hasParameter('csv')) { $csvParams = $request->getParameter('csv'); $filePath = $request->getFilePath('csv[file]'); $this->csv_form->bind($csvParams, $request->getFiles('csv')); $refParams = $request->getParameter('reference'); $this->reference_form->bind($refParams); if ($this->reference_form->isValid()) { if ($spreadsheetArr = LsSpreadsheet::parse($filePath)) { $names = $spreadsheetArr['rows']; if (!in_array('name', $spreadsheetArr['headers'])) { $request->setError('csv', 'The file you uploaded could not be parsed properly because there is no "name" column.'); return; } } else { $request->setError('csv', 'The file you uploaded could not be parsed properly.'); return; } if ($this->ref_id = $refParams['existing_source']) { $ref = Doctrine::getTable('Reference')->find($this->ref_id); $url = $ref->source; } else { $ref = new Reference(); $ref->object_model = 'LsList'; $ref->object_id = $this->list->id; $ref->source = $refParams['source']; $ref->name = $refParams['name']; $ref->source_detail = $refParams['source_detail']; $ref->publication_date = $refParams['publication_date']; $ref->save(); $this->ref_id = $ref->id; } $this->default_type = $request->getParameter('default_type'); if (!$this->default_type) { $request->setError('csv', 'You need to choose a default type.'); return; } $this->extensions = ExtensionDefinitionTable::getByTier(2, $this->default_type); $extensions_arr = array(); foreach ($this->extensions as $ext) { $extensions_arr[] = $ext->name; } $this->matches = array(); if (isset($names) && count($names) > 0) { for ($i = 0; $i < count($names); $i++) { if (isset($names[$i]['name']) && trim($names[$i]['name']) != '') { $name = $names[$i]['name']; $name_terms = $name; if ($this->default_type == 'Person') { $name_parts = preg_split('/\\s+/', $name); if (count($name_parts) > 1) { $name_terms = PersonTable::nameSearch($name, true); } $terms = $name_terms; $primary_ext = "Person"; } else { if ($this->default_type == 'Org') { $name_terms = OrgTable::nameSearch($name); $terms = $name_terms; $primary_ext = "Org"; } else { $terms = $name_terms; $primary_ext = null; } } $pager = EntityTable::getSphinxPager($terms, $page = 1, $num = 20, $listIds = null, $aliases = true, $primary_ext); $match = array('name' => $name); $match['search_results'] = $pager->execute(); $match['blurb'] = isset($names[$i]['blurb']) ? $names[$i]['blurb'] : null; $match['rank'] = isset($names[$i]['rank']) ? $names[$i]['rank'] : null; $match['types'] = array(); if (isset($names[$i]['types'])) { $types = explode(',', $names[$i]['types']); $types = array_map('trim', $types); foreach ($types as $type) { if (in_array($type, $extensions_arr)) { $match['types'][] = $type; } } } $this->matches[] = $match; } } } } } else { if ($request->hasParameter('ref_id')) { $this->ref_id = $this->getRequestParameter('ref_id'); $entity_ids = array(); $default_type = $this->getRequestParameter('default_type'); for ($i = 0; $i < $this->getRequestParameter('count'); $i++) { if ($entity_id = $request->getParameter('entity_' . $i)) { $selected_entity_id = null; if ($entity_id == 'new') { $name = $request->getParameter('new_name_' . $i); if ($default_type == 'Person') { $new_entity = PersonTable::parseFlatName($name); } else { $new_entity = new Entity(); $new_entity->addExtension('Org'); $new_entity->name = trim($name); } if ($types = $request->getParameter('new_extensions_' . $i)) { foreach ($types as $type) { $new_entity->addExtension($type); } } $new_entity->save(); $new_entity->blurb = $request->getParameter('new_blurb_' . $i); $ref = Doctrine::getTable('Reference')->find($request->getParameter('ref_id')); $new_entity->addReference($ref->source, null, null, $ref->name); $new_entity->save(); $selected_entity_id = $new_entity->id; } else { if ($entity_id > 0) { $selected_entity_id = $entity_id; } } if ($selected_entity_id) { $q = LsDoctrineQuery::create()->from('LsListEntity le')->where('le.list_id = ? AND le.entity_id = ?', array($this->list['id'], $selected_entity_id)); if (!$q->count()) { $ls_list_entity = new LsListEntity(); $ls_list_entity->list_id = $this->list->id; $ls_list_entity->entity_id = $selected_entity_id; $ls_list_entity->rank = $request->getParameter('entity_' . $i . '_rank'); $ls_list_entity->save(); LsCache::clearEntityCacheById($selected_entity_id); } } } } $this->clearCache($this->list); $this->clearRailsCache($this->list->id); $this->redirect($this->list->getInternalUrl()); } else { $request->setError('name', 'The name you entered is invalid'); } } } }
private function importFiling($org, $lda_filing) { try { $this->printTimeSince(); $this->printDebug('Starting import...'); $excerpt = array(); //$time = microtime(1); $this->db->beginTransaction(); $date = null; $excerpt['Federal Filing Id'] = $lda_filing->federal_filing_id; $excerpt['Year'] = $lda_filing->year; $excerpt['Type'] = $lda_filing->LdaType->description; if (preg_match('/^[^T]*/su', $lda_filing->received, $match)) { $date = $match[0]; $date = str_replace('/', '-', $date); } $lda_registrant = Doctrine::getTable('LdaRegistrant')->find($lda_filing->registrant_id); $excerpt['Registrant'] = $lda_registrant->name; if ($lda_filing->client_id) { $lda_client = Doctrine::getTable('LdaClient')->find($lda_filing->client_id); $excerpt['Client'] = $lda_client->name; } else { $this->db->rollback(); return null; } $lobbying_entity = null; //DETERMINE (& CREATE) LOBBYING ENTITY //$this->printTimeSince(); //$this->printDebug('determine/create...'); if (strtolower(OrgTable::stripNamePunctuation($lda_client->name)) == strtolower(OrgTable::stripNamePunctuation($lda_registrant->name))) { $lobbying_entity = $org; $client_entity = null; if (!$lobbying_entity->lda_registrant_id) { $lobbying_entity->lda_registrant_id = $lda_registrant->federal_registrant_id; $lobbying_entity->save(); $lobbying_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbying_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false); } else { if ($lobbying_entity->lda_registrant_id != $lda_registrant->federal_registrant_id) { $this->printDebug("LDA registrant ids did not match up for {$lobbying_entity->name} and {$lda_registrant->name} even though names matched {$lda_client->name}\n"); $this->db->rollback(); return null; } } $this->printDebug($lobbying_entity->name . ' noted (same as client ' . $lda_client->name . ')'); } else { $client_entity = $org; if ($lda_client->description) { $description = trim($lda_client->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { if (!$org->blurb || $org->blurb == '') { $org->blurb = $description; } } else { if (!$org->summary || $org->summary == '') { $org->summary = $description; } } } } $org->save(); $this->printDebug($lda_client->name . ' is distinct from ' . $lda_registrant->name); } $lda_lobbyists = $lda_filing->LdaLobbyists; $excerpt['Lobbyists'] = array(); foreach ($lda_lobbyists as $lda_lobbyist) { $excerpt['Lobbyists'][] = $lda_lobbyist->name; } $excerpt['Lobbyists'] = implode('; ', $excerpt['Lobbyists']); if (!$lobbying_entity) { $lobbyist_name = null; if (count($lda_lobbyists)) { $lobbyist_parts = explode(',', $lda_lobbyists[0]->name); if (count($lobbyist_parts) > 1) { $lobbyist_last = trim($lobbyist_parts[0]); $arr = LsString::split($lobbyist_parts[1]); $lens = array_map('strlen', $arr); arsort($lens); $keys = array_keys($lens); $lobbyist_longest = $arr[$keys[0]]; $lobbyist_name = trim($lobbyist_parts[1]) . ' ' . trim($lobbyist_parts[0]); $existing_lobbyist_registrant = null; } else { $lobbyist_name = preg_replace('/^(Mr|MR|MS|Dr|DR|MRS|Mrs|Ms)\\b\\.?/su', '', $lda_lobbyists[0]->name); $arr = LsString::split(trim($lobbyist_name)); $arr = LsArray::strlenSort($arr); $lobbyist_last = array_pop($arr); if (count($arr)) { $lobbyist_longest = array_shift(LsArray::strlenSort($arr)); } else { $lobbyist_longest = ''; } } } //check to see if registrant and lobbyist are same if (count($lda_lobbyists) == 1 && (strtoupper($lda_lobbyists[0]->name) == strtoupper($lda_registrant->name) || $lobbyist_last && stripos($lda_registrant->name, $lobbyist_last) == strlen($lda_registrant->name) - strlen($lobbyist_last) && stristr($lda_registrant->name, $lobbyist_longest))) { $existing_lobbyist_registrant = EntityTable::getByExtensionQuery('Lobbyist')->addWhere('lobbyist.lda_registrant_id = ?', $lda_registrant->federal_registrant_id)->execute()->getFirst(); if ($existing_lobbyist_registrant) { $lobbying_entity = $existing_lobbyist_registrant; $this->printDebug('Existing lobbyist is lobbying entity: ' . $lobbying_entity->name); } else { $lobbyist = $this->prepLobbyistName($lda_lobbyists[0]->name); if ($lobbyist) { $lobbyist->lda_registrant_id = $lda_registrant->federal_registrant_id; $lobbyist->save(); $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); $this->printDebug('New lobbyist/lobbying entity saved: ' . $lobbyist->name); $lobbying_entity = $lobbyist; } } } else { if ($existing_firm = EntityTable::getByExtensionQuery('Org')->addWhere('org.lda_registrant_id = ? ', $lda_registrant->federal_registrant_id)->execute()->getFirst()) { $modified = array(); $lobbying_entity = $existing_firm; if ($lda_registrant->description) { $description = trim($lda_registrant->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { if (!$existing_firm->blurb || $existing_firm->blurb == '') { $existing_firm->blurb = $description; $modified[] = 'blurb'; } } else { if (!$existing_firm->summary || $existing_firm->summary == '') { $existing_firm->summary = $description; $modified[] = 'summary'; } } } } if ($lda_registrant->address && $lda_registrant->address != '' && count($existing_firm->Address) == 0) { if ($address = $existing_firm->addAddress($lda_registrant->address)) { $existing_firm->save(); $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false); } } $existing_firm->save(); if (count($modified)) { $existing_firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $modified, 'LDA Filing', null, $date, false); } $this->printDebug('Existing firm is lobbying entity: ' . $lobbying_entity->name); } else { $firm = new Entity(); $firm->addExtension('Org'); $firm->addExtension('Business'); $firm->addExtension('LobbyingFirm'); $firm->name = LsLanguage::titleize(OrgTable::stripNamePunctuation($lda_registrant->name), true); $firm->lda_registrant_id = $lda_registrant->federal_registrant_id; if ($lda_registrant->description) { $description = trim($lda_registrant->description); if ($description != '' && preg_match('/[\\/\\-]\\d+[\\/\\-]/isu', $description) == 0) { if (strlen($description) < 200) { $firm->blurb = $description; } else { $firm->summary = $description; } } } if ($lda_registrant->address && $lda_registrant->address != '') { if ($address = $firm->addAddress($lda_registrant->address)) { $firm->save(); $address->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $address->getAllModifiedFields(), 'LDA Filing', null, $date, false); } } $firm->save(); $this->printDebug('New lobbying firm/lobbying entity saved: ' . $firm->name); $firm->addReference(self::$filing_url . $lda_filing->federal_filing_id, null, $firm->getAllModifiedFields(), 'LDA Filing', null, $date, false); $lobbying_entity = $firm; } } } //PREP GOVT ENTITIES //$this->printTimeSince(); //$this->printDebug('gov entities...'); $lda_govts = $lda_filing->LdaGovts; //$this->printDebug('count of lda govs is ***** ' . count($lda_govts)); $govt_entities = array(); $excerpt['Government Bodies'] = array(); foreach ($lda_govts as $lda_govt) { $excerpt['Government Bodies'][] = $lda_govt->name; $name_arr = $this->prepGovtName($lda_govt->name); if (!$name_arr) { continue; } if ($govt_entity = EntityTable::findByAlias($lda_govt->name, $context = 'lda_government_body')) { $govt_entities[] = $govt_entity; //$this->printDebug('Existing govt entity: ' . $govt_entity->name); } else { if ($govt_entity = EntityTable::getByExtensionQuery(array('Org', 'GovernmentBody'))->addWhere('name = ?', array($name_arr[0]))->fetchOne()) { $govt_entities[] = $govt_entity; $alias = new Alias(); $alias->context = 'lda_government_body'; $alias->name = $lda_govt->name; $alias->entity_id = $govt_entity->id; $alias->save(); } else { $govt_entity = new Entity(); $govt_entity->addExtension('Org'); $govt_entity->addExtension('GovernmentBody'); $govt_entity->name = $name_arr[0]; $govt_entity->name_nick = $name_arr[1]; $govt_entity->is_federal = 1; $govt_entity->save(); $alias = new Alias(); $alias->context = 'lda_government_body'; $alias->name = $lda_govt->name; $alias->entity_id = $govt_entity->id; $alias->save(); $govt_entity->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $govt_entity->getAllModifiedFields(), 'LDA Filing', null, $date, false); $govt_entities[] = $govt_entity; } } } $excerpt['Government Bodies'] = implode('; ', $excerpt['Government Bodies']); $excerpt_str = ''; foreach ($excerpt as $k => $v) { $excerpt_str .= $k . ": "; $excerpt_str .= $v . "\n"; } $excerpt = trim($excerpt_str); $this->printDebug($excerpt); $relationships = array(); $lobbying_entity_extensions = $lobbying_entity->getExtensions(); //CREATE LOBBYIST POSITION RELATIONSHIPS //$this->printTimeSince(); //$this->printDebug('lobbyist positions...'); $category = Doctrine::getTable('RelationshipCategory')->findOneByName('Position'); if (!in_array('Lobbyist', $lobbying_entity_extensions)) { $firm_lobbyists = array(); if ($lobbying_entity->exists()) { $q = LsDoctrineQuery::create()->from('Entity e')->leftJoin('e.Relationship r ON (r.entity1_id = e.id)')->where('r.entity2_id = ? AND r.category_id = ?', array($lobbying_entity->id, RelationshipTable::POSITION_CATEGORY)); $firm_lobbyists = $q->execute(); } $lobbyists = array(); foreach ($lda_lobbyists as $lda_lobbyist) { $lobbyist = $this->prepLobbyistName($lda_lobbyist->name); if (!$lobbyist) { continue; } $existing_lobbyist = null; foreach ($firm_lobbyists as $fl) { if (PersonTable::areNameCompatible($fl, $lobbyist)) { $existing_lobbyist = $fl; break; } } //echo "before lobb save or rel save: "; //$this->printTimeSince(); if (!$existing_lobbyist) { $lobbyist->save(); $lobbyist->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New lobbyist saved: ' . $lobbyist->name); $r = new Relationship(); $r->Entity1 = $lobbyist; $r->Entity2 = $lobbying_entity; $r->setCategory('Position'); $r->description1 = 'Lobbyist'; $r->is_employee = 1; $r->save(); $r->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbyist->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New position relationship saved: ' . $lobbying_entity->name . ' and ' . $lobbyist->name); $lobbyists[] = $lobbyist; } else { //$this->printDebug('Lobbyist exists: ' . $lobbyist->name . ' is same as ' . $existing_lobbyist->name); $lobbyists[] = $existing_lobbyist; } } } //PREP ISSUES //$this->printTimeSince(); //$this->printDebug('issues...'); $issues = array(); $lda_issues = Doctrine_Query::create()->from('LdaFilingIssue f')->leftJoin('f.LdaIssue i')->where('f.filing_id = ?', $lda_filing->id)->execute(); foreach ($lda_issues as $lda_issue) { $name = LsLanguage::nameize($lda_issue->LdaIssue->name); if (!($issue = Doctrine::getTable('LobbyIssue')->findOneByName($name))) { $issue = new LobbyIssue(); $issue->name = $name; $issue->save(); //$this->printDebug('Lobbying issue saved: ' . $issue->name); } $issues[] = array($issue, $lda_issue->specific_issue); } //CREATE LOBBY FILING //$this->printTimeSince(); //$this->printDebug('creating lobby filing:'); $lobby_filing = new LobbyFiling(); $lobby_filing->year = $lda_filing->year; $lobby_filing->amount = $lda_filing->amount; $lobby_filing->federal_filing_id = $lda_filing->federal_filing_id; $period = $lda_filing->LdaPeriod->description; $lobby_filing->start_date = $date; if ($paren = strpos($period, '(')) { $lobby_filing->period = trim(substr($period, 0, $paren)); } else { $lobby_filing->period = 'Undetermined'; } $lobby_filing->report_type = LsLanguage::nameize($lda_filing->LdaType->description); foreach ($issues as $issue) { $filing_issue = new LobbyFilingLobbyIssue(); $filing_issue->Issue = $issue[0]; $filing_issue->Filing = $lobby_filing; $filing_issue->specific_issue = $issue[1]; $filing_issue->save(); } if (in_array('Lobbyist', $lobbying_entity_extensions)) { $lobby_filing->Lobbyist[] = $lobbying_entity; //$this->printDebug('Lobbying entity lobbyist added to lobbying relationship: ' . $lobbying_entity->name); } else { foreach ($lobbyists as $lobbyist) { $lobby_filing->Lobbyist[] = $lobbyist; } } //var_dump($lobby_filing->toArray()); $lobby_filing->save(); //CREATE TRANSACTION RELATIONSHIP, IF ANY //$this->printTimeSince(); //$this->printDebug('starting transaction relationships:'); $transaction = null; if ($client_entity != null) { $transaction = RelationshipTable::getByCategoryQuery('Transaction')->addWhere('r.entity1_id = ?', $client_entity->id)->addWhere('r.entity2_id = ?', $lobbying_entity->id)->addWhere('transaction.is_lobbying = ?', 1)->fetchOne(); if ($transaction) { $transaction->updateDateRange($date, true); if ($lda_filing->amount && $lda_filing->amount != '') { if (!$transaction->amount || $transaction->amount == '') { $transaction->amount = $lda_filing->amount; } else { $transaction->amount += $lda_filing->amount; } } $transaction->filings++; $transaction->save(); $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false); } else { $transaction = new Relationship(); $transaction->Entity1 = $client_entity; $transaction->Entity2 = $lobbying_entity; $transaction->setCategory('Transaction'); $transaction->description1 = 'Lobbying Client'; $transaction->is_lobbying = 1; $transaction->filings = 1; $transaction->updateDateRange($date, true); if (in_array('Person', $lobbying_entity_extensions)) { $transaction->description2 = 'Hired Lobbyist'; } else { $transaction->description2 = 'Lobbying Firm'; } if ($lda_filing->amount && $lda_filing->amount != '') { $transaction->amount = $lda_filing->amount; } $transaction->save(); $transaction->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $transaction->getAllModifiedFields(), 'LDA Filing', null, $date, false); //$this->printDebug('New lobbying transaction saved between client ' . $client_entity->name . ' and lobbying firm ' . $lobbying_entity->name); } $relationships[] = $transaction; } //CREATE LOBBYING RELATIONSHIP //$this->printTimeSince(); //$this->printDebug('starting lobbying relationships:'); foreach ($govt_entities as $govt_entity) { $lobbying_relationship = RelationshipTable::getByCategoryQuery('Lobbying')->addWhere('r.entity1_id = ?', $lobbying_entity->id)->addWhere('r.entity2_id = ?', $govt_entity->id)->fetchOne(); if ($lobbying_relationship) { $lobbying_relationship->updateDateRange($date); $lobbying_relationship->filings++; $lobbying_relationship->save(); } else { $lobbying_relationship = new Relationship(); $lobbying_relationship->Entity1 = $lobbying_entity; $lobbying_relationship->Entity2 = $govt_entity; $lobbying_relationship->setCategory('Lobbying'); if ($transaction) { $lobbying_relationship->description1 = 'Lobbying (for client)'; } else { $lobbying_relationship->description1 = 'Direct Lobbying'; } $lobbying_relationship->description2 = $lobbying_relationship->description1; $lobbying_relationship->updateDateRange($date, true); $lobbying_relationship->filings = 1; $lobbying_relationship->save(); $lobbying_relationship->addReference(self::$filing_url . $lda_filing->federal_filing_id, $excerpt, $lobbying_relationship->getAllModifiedFields(), 'LDA Filing', null, $date, false); } $relationships[] = $lobbying_relationship; } foreach ($relationships as $relationship) { $lobby_filing->Relationship[] = $relationship; } $lobby_filing->save(); //$this->printTimeSince(); $this->printDebug("Import Completed\n"); $this->db->commit(); } catch (Exception $e) { $this->db->rollback(); throw $e; } }
public function parseResults($match) { if (isset($match['bio'])) { $bio_dirty = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($match['bio'], "; "))); $bio_dirty = preg_replace('/(\\;\\s)+/is', '; ', $bio_dirty); } foreach ($match as $k => &$m) { $m = LsHtml::replaceEntities(LsString::spacesToSpace(LsHtml::stripTags($m, " "))); } if (isset($match['name'])) { $name = $match['name']; $bio = ''; if (isset($match['bio'])) { $bio = $match['bio']; } } else { return; } $this->printDebug("_________________________\n\nname: " . $name . "\n"); $this->printDebug("bio: " . $bio . "\n"); $accept = strtolower($this->readline('Process this entity? (n to skip) ')); if ($accept == 'n' || $accept == 'no') { return false; } if (!$this->org_org) { if ($this->last_first) { $entity = PersonTable::parseCommaName($name); } else { $entity = PersonTable::parseFlatName($name); } $similar_entities = PersonTable::getSimilarQuery2($entity)->execute(); } else { $entity = new Entity(); $entity->addExtension('Org'); foreach ($this->org_extensions as $ext) { $entity->addExtension($ext); } $entity->setEntityField('name', $name); $name = trim($name); $name = str_replace('.', '', $name); $similar_entities = OrgTable::getSimilarQuery($entity)->execute(); } $matched = false; foreach ($similar_entities as $similar_entity) { if ($similar_entity['primary_ext'] == 'Person') { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Orgs :: ' . $similar_entity->getRelatedOrgsSummary() . " Bio :: {$similar_entity->summary})"); } else { $this->printDebug(' POSSIBLE MATCH: ' . $similar_entity->name . ' (Summary :: ' . $similar_entity->summary . ')'); } $accept = $this->readline(' Is this the same entity? (y or n)'); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' Is this the same entity? (y or n) '); $attempts++; } if ($accept == 'y') { $entity = $similar_entity; $matched = true; $this->printDebug(' [accepted]'); //sleep(1); break; } else { if ($accept == 'break') { break; } } } $created = false; if (!$matched) { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug(' New person: ' . $entity->name_first . ' ' . $entity->name_last); } else { $this->printDebug(' New org: ' . $entity->name); } $accept = $this->readline(' create this new entity? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' create this new entity? (y or n) '); $attempts++; } if ($accept == 'y') { if ($entity->getPrimaryExtension() == 'Person') { $this->printDebug("\n Bio: {$bio} \n"); $accept = $this->readline(' Add this bio? (y or n) '); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(' add this bio? (y or n) '); $attempts++; } if ($accept == 'y') { $entity->summary = $bio; } } $entity->save(); $entity->addReference($this->url, null, null, $this->url_name); $created = true; $this->printDebug(' ' . $entity->name . ' saved'); //sleep(1); } } if (($matched || $created) && $entity->getPrimaryExtension() == 'Person') { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline("Parse above bio for possible relationships? (y or n) "); $attempts++; } if ($accept == 'y') { $names = $entity->parseBio($bio_dirty); $this->printDebug(" Orgs that {$entity} has a position at?"); foreach ($names as $name) { $exists = false; $name = trim($name); $accept = $this->readline(" > {$name} :: an org? (y or n or b to break) "); $attempts = 1; $accept = strtolower($accept); while ($accept != 'y' && $accept != 'n' && $accept != 'b' && $attempts < 5) { $accept = $this->readline(" {$name} :: an org? (y or n or b to break) "); $accept = strtolower($accept); $attempts++; } if ($accept == 'b') { break; } else { if ($accept == 'y') { $this->printDebug(' .....looking for names.....'); $orgs = EntityTable::getByExtensionAndNameQuery('Org', $name)->limit(10)->execute(); $related_org = null; foreach ($orgs as $org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('entity1_id = ? and entity2_id = ?', array($entity->id, $org->id))->fetchOne(); if ($q) { $this->printDebug(' Position already exists, skipping...'); $exists = true; break; } $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts = 1; while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" Create a position relationship between {$entity->name} and {$org->name}? (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = $org; break; } } if (!$related_org && !$exists) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); while ($accept != 'y' && $accept != 'n' && $attempts < 5) { $accept = $this->readline(" couldn't find org, should this one be created: {$name} (y or n) "); $attempts++; } if ($accept == 'y') { $related_org = new Entity(); $related_org->addExtension('Org'); $related_org->name = preg_replace('/\\.(?!com)/i', '', $name); $extensions = $this->readline(" what extensions should this org get? (eg 'Business, LobbyingFirm, LawFirm') "); $extensions = preg_split('/\\,\\s*/isu', $extensions, -1, PREG_SPLIT_NO_EMPTY); try { foreach ($extensions as $extension) { $related_org->addExtension($extension); } $related_org->save(); $related_org->addReference($this->url, null, null, $this->url_name); } catch (Exception $e) { $this->printDebug(' !!! problems with org creation, skipping'); $related_org = null; } } } if ($related_org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $related_org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); continue; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $related_org; $relationship->setCategory('Position'); $title = $this->readline(" Title for this position relationship? (<enter> to skip) "); if (strlen($title) > 2) { $relationship->description1 = $title; } $current = strtolower($this->readline(" Is the relationship current? (y or n or <enter> to skip) ")); if (in_array($current, array('y', 'yes'))) { $relationship->is_current = 1; } else { if (in_array($current, array('n', 'no'))) { $relationship->is_current = 0; } } $board = strtolower($this->readline(" Is the relationship a board position? (y or n or <enter> to skip) ")); if (in_array($board, array('y', 'yes'))) { $relationship->is_board = 1; } else { if (in_array($board, array('n', 'no'))) { $relationship->is_board = 0; } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } } } } } if ($matched || $created) { if ($this->list) { $q = LsDoctrineQuery::create()->from('LsListEntity l')->where('l.entity_id = ? and l.list_id = ?', array($entity->id, $this->list->id))->fetchOne(); if (!$q) { $le = new LsListEntity(); $le->Entity = $entity; $le->LsList = $this->list; if (isset($match['rank'])) { if (preg_match('/(\\d+)/isu', $match['rank'], $m)) { $le->rank = $m[1]; } } $le->save(); $this->printDebug('List membership saved'); } } if ($this->org) { $q = LsDoctrineQuery::create()->from('Relationship r')->where('r.entity1_id = ? and r.entity2_id = ? and r.category_id = ?', array($entity->id, $this->org->id, 1))->fetchOne(); if ($q) { $this->printDebug(' (relationship already found, skipping...)'); return; } $relationship = new Relationship(); $relationship->Entity1 = $entity; $relationship->Entity2 = $this->org; $relationship->setCategory($this->relationship_category); if ($this->description1) { $relationship->description1 = $this->description1; } else { $description = $this->readline(" what description to give this relationship ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($description) > 2) { $relationship->description1 = $description; } } if ($this->relationship_category == 'Position') { $relationship->is_board = $this->is_board; } else { if ($this->relationship_category == 'Donation') { if ($this->amount) { $relationship->amount = $this->amount; } else { $amount = $this->readline(" what amount ({$relationship}) ? (less than 3 chars will skip)"); if (strlen($amount) > 1) { $relationship->amount = $amount; } } } } $relationship->save(); $relationship->addReference($this->url, null, null, $this->url_name); $this->printDebug(" Relationship saved: {$relationship}"); } } //dump history if (isset($match['affiliation1'])) { $affiliation = $match['affiliation']; //$this->printDebug($affiliation); } }
private function importCompany($name, $ticker, $website, $address_raw, $telephone, $revenue, $url, $industry) { $corp = new Entity(); $corp->addExtension('Org'); $corp->addExtension('Business'); if ($ticker) { $corp->addExtension('PublicCompany'); $corp->ticker = $ticker; } else { $corp->addExtension('PrivateCompany'); } $corp->name = $name; $corp->revenue = LsNumber::formatDollarAmountAsNumber($revenue, 1000000); $corp->website = $website; $modified = $corp->getAllModifiedFields(); if ($address = $corp->addAddress($address_raw)) { $addressModified = $address->getAllModifiedFields(); } if ($telephone) { $phone = $corp->addPhone($telephone); $phoneModified = $phone->getAllModifiedFields(); } $corp->save(); $corp->addReference($url, null, $modified, 'Fortune Magazine Online'); if ($address) { $address->addReference($url, null, $addressModified, 'Fortune Magazine Online'); } if ($phone) { $phone->addReference($url, null, $phoneModified, 'Fortune Magazine Online'); } if ($industry) { } return $corp; }
public function executeAddBulk($request) { $this->checkEntity($request, false, false); $this->reference_form = new ReferenceForm(); $this->reference_form->setSelectObject($this->entity); $this->add_bulk_form = new AddBulkForm(); //get possible default categories $this->categories = LsDoctrineQuery::create()->select('c.name, c.name')->from('RelationshipCategory c')->orderBy('c.id')->fetchAll(PDO::FETCH_KEY_PAIR); array_unshift($this->categories, ''); if ($request->isMethod('post') && in_array($request->getParameter('commit'), array('Begin', 'Continue'))) { if ($request->hasParameter('ref_id')) { $this->ref_id = $request->getParameter('ref_id'); } else { $refParams = $request->getParameter('reference'); $this->reference_form->bind($refParams); $restOfParams = (array) $request->getParameterHolder(); $restOfParams = array_shift($restOfParams); $this->add_bulk_form->bind($restOfParams, $request->getFiles()); if (!$this->reference_form->isValid() || !$this->add_bulk_form->isValid()) { return; } if ($this->ref_id = $refParams['existing_source']) { $ref = Doctrine::getTable('Reference')->find($this->ref_id); $url = $ref->source; } else { $ref = new Reference(); $ref->object_model = 'Entity'; $ref->object_id = $this->entity->id; $ref->source = $refParams['source']; $ref->name = $refParams['name']; $ref->source_detail = $refParams['source_detail']; $ref->publication_date = $refParams['publication_date']; $ref->save(); } $this->ref_id = $ref->id; $this->reference = $ref; } $verify_method = $request->getParameter('verify_method'); if ($this->add_method = $request->getParameter('add_method')) { if ($this->add_method == 'scrape') { //scrape ref url //set names to confirm $browser = new sfWebBrowser(); $entity_types = $request->getParameter('entity_types'); //FIND NAMES AT URL USING COMBO OF OPENCALAIS & LS CUSTOM HTML PARSING if (!$browser->get($ref->source)->responseIsError()) { $text = $browser->getResponseText(); $this->names = LsTextAnalysis::getHtmlEntityNames($text, $entity_types); $text = LsHtml::findParagraphs($text); $this->text = preg_replace('/<[^b][^>]*>/is', " ", $text); $this->confirm_names = true; return; } else { $request->setError('csv', 'problems finding names at that url'); } } else { if ($this->add_method == 'upload') { $file = $this->add_bulk_form->getValue('file'); $filename = 'uploaded_' . sha1($file->getOriginalName()); $extension = $file->getExtension($file->getOriginalExtension()); $filePath = sfConfig::get('sf_temp_dir') . '/' . $filename . $extension; $file->save($filePath); if ($filePath) { if ($spreadsheetArr = LsSpreadsheet::parse($filePath)) { $names = $spreadsheetArr['rows']; if (!in_array('name', $spreadsheetArr['headers'])) { $request->setError('file', 'The file you uploaded could not be parsed properly because there is no "name" column.'); return; } if (in_array('summary', $spreadsheetArr['headers'])) { foreach ($names as &$name) { $name['summary'] = str_replace(array('?', "'"), "'", $name['summary']); $name['summary'] = str_replace(array('?', '?', '"'), '"', $name['summary']); if (isset($name['title'])) { $name['description1'] = $name['title']; } } unset($name); } } else { $request->setError('file', 'The file you uploaded could not be parsed properly.'); return; } } else { $request->setError('file', 'You need to upload a file.'); return; } } else { if ($this->add_method == 'summary') { //parse summary for names $this->text = $this->entity->summary; $entity_types = $request->getParameter('entity_types'); $this->names = LsTextAnalysis::getTextEntityNames($this->text, $entity_types); $this->confirm_names = true; return; } else { if ($this->add_method == 'text') { $manual_names = $request->getParameter('manual_names'); if ($manual_names && $manual_names != "") { $manual_names = preg_split('#[\\r\\n]+#', $manual_names); $manual_names = array_map('trim', $manual_names); $names = array(); foreach ($manual_names as $name) { $names[] = array('name' => $name); } } else { $request->setError('csv', 'You did not add names properly.'); return; } } else { if ($this->add_method == 'db_search') { $this->db_search = true; } } } } } } //intermediate scrape page -- takes confirmed names, builds names arr if ($confirmed_names = $request->getParameter('confirmed_names')) { $restOfParams = (array) $request->getParameterHolder(); $restOfParams = array_shift($restOfParams); $this->add_bulk_form->bind($restOfParams, $request->getFiles()); if (!$this->add_bulk_form->isValid()) { $this->reference = Doctrine::getTable('reference')->find($this->ref_id); $this->names = unserialize(stripslashes($request->getParameter('names'))); $this->confirm_names = true; return; } $names = array(); foreach ($confirmed_names as $cn) { $names[] = array('name' => $cn); } $manual_names = $request->getParameter('manual_names'); if ($manual_names && $manual_names != "") { $manual_names = preg_split('#[\\r\\n]+#', $manual_names); $manual_names = array_map('trim', $manual_names); foreach ($manual_names as $name) { $names[] = array('name' => $name); } } } // LOAD IN RELATIONSHIP DEFAULTS if (isset($verify_method)) { $defaults = $request->getParameter('relationship'); if ($verify_method == 'enmasse') { $this->default_type = $request->getParameter('default_type'); $this->order = $request->getParameter('order'); $category_name = $request->getParameter('relationship_category_all'); $this->extensions = ExtensionDefinitionTable::getByTier(2, $this->default_type); $extensions_arr = array(); foreach ($this->extensions as $ext) { $extensions_arr[] = $ext->name; } } else { $category_name = $request->getParameter('relationship_category_one'); } if ($category_name) { $this->category_name = $category_name; if (!($category = Doctrine::getTable('RelationshipCategory')->findOneByName($category_name))) { $request->setError('csv', 'You did not select a relationship category.'); return; } $formClass = $category_name . 'Form'; $categoryForm = new $formClass(new Relationship()); $categoryForm->setDefaults($defaults); $this->form_schema = $categoryForm->getFormFieldSchema(); if (in_array($category_name, array('Position', 'Education', 'Membership', 'Donation', 'Lobbying', 'Ownership'))) { $this->field_names = array('description1', 'start_date', 'end_date', 'is_current'); } else { $this->field_names = array('description1', 'description2', 'start_date', 'end_date', 'is_current'); } $extraFields = array('Position' => array('is_board', 'is_executive'), 'Education' => array('degree_id'), 'Donation' => array('amount'), 'Transaction' => array('amount'), 'Lobbying' => array('amount'), 'Ownership' => array('percent_stake', 'shares')); if (isset($extraFields[$category_name])) { $this->field_names = array_merge($this->field_names, $extraFields[$category_name]); } } $this->matches = array(); // BOOT TO TOOLBAR OR LOOK FOR MATCHES FOR ENMASSE ADD if (isset($names) && count($names) > 0 || isset($this->db_search)) { if ($verify_method == 'onebyone') { if (isset($category_name)) { $defaults['category'] = $category_name; } $toolbar_names = array(); foreach ($names as $name) { $toolbar_names[] = $name['name']; } $this->getUser()->setAttribute('toolbar_names', $toolbar_names); $this->getUser()->setAttribute('toolbar_entity', $this->entity->id); $this->getUser()->setAttribute('toolbar_defaults', $defaults); $this->getUser()->setAttribute('toolbar_ref', $this->ref_id); $this->redirect('relationship/toolbar'); } else { $this->category_name = $category_name; if (isset($this->db_search)) { $num = $request->getParameter('num', 10); $page = $request->getParameter('page', 1); $q = LsDoctrineQuery::create()->from('Entity e')->where('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $this->entity->name . '[[:>:]]', '[[:<:]]' . $this->entity->name . '[[:>:]]')); foreach ($this->entity->Alias as $alias) { $q->orWhere('(e.summary rlike ? or e.blurb rlike ?)', array('[[:<:]]' . $alias->name . '[[:>:]]', '[[:<:]]' . $alias->name . '[[:>:]]')); } $q->setHydrationMode(Doctrine::HYDRATE_ARRAY); $cat_id = constant('RelationshipTable::' . strtoupper($category_name) . '_CATEGORY'); $q->whereParenWrap(); $q->andWhere('NOT EXISTS (SELECT DISTINCT l.relationship_id FROM Link l ' . 'WHERE l.entity1_id = e.id AND l.entity2_id = ? AND l.category_id = ?)', array($this->entity['id'], $cat_id)); $summary_matches = $q->execute(); foreach ($summary_matches as $summary_match) { $aliases = array(); foreach ($this->entity->Alias as $alias) { $aliases[] = LsString::escapeStringForRegex($alias->name); } $aliases = implode("|", $aliases); $summary_match['summary'] = preg_replace('/(' . $aliases . ')/is', '<strong>$1</strong>', $summary_match['summary']); $this->matches[] = array('search_results' => array($summary_match)); } } else { for ($i = 0; $i < count($names); $i++) { if (isset($names[$i]['name']) && trim($names[$i]['name']) != '') { $name = $names[$i]['name']; $name_terms = $name; if ($this->default_type == 'Person') { $name_parts = preg_split('/\\s+/', $name); if (count($name_parts) > 1) { $name_terms = PersonTable::nameSearch($name); } $terms = $name_terms; $primary_ext = "Person"; } else { if ($this->default_type == 'Org') { $name_terms = OrgTable::nameSearch($name); $terms = $name_terms; $primary_ext = "Org"; } else { $terms = $name_terms; $primary_ext = null; } } $pager = EntityTable::getSphinxPager($terms, $page = 1, $num = 20, $listIds = null, $aliases = true, $primary_ext); $match = $names[$i]; $match['search_results'] = $pager->execute(); if (isset($names[$i]['types'])) { $types = explode(',', $names[$i]['types']); $types = array_map('trim', $types); $match['types'] = array(); foreach ($types as $type) { if (in_array($type, $extensions_arr)) { $match['types'][] = $type; } } } $this->matches[] = $match; } } } } } } } else { if ($page = $this->getRequestParameter('page')) { $this->page = $page; $this->num = $this->getRequestParameter('num', 50); } else { if ($request->isMethod('post') && $request->getParameter('commit') == 'Submit') { $this->ref_id = $this->getRequestParameter('ref_id'); $entity_ids = array(); $relationship_category = $this->getRequestParameter('category_name'); $order = $this->getRequestParameter('order'); $default_type = $request->getParameter('default_type'); $default_ref = Doctrine::getTable('Reference')->find($request->getParameter('ref_id')); for ($i = 0; $i < $this->getRequestParameter('count'); $i++) { if ($entity_id = $request->getParameter('entity_' . $i)) { $selected_entity_id = null; $relParams = $request->getParameter("relationship_" . $i); if ($relParams['ref_name']) { $ref['source'] = $relParams['ref_source']; $ref['name'] = $relParams['ref_name']; } if ($entity_id == 'new') { $name = $request->getParameter('new_name_' . $i); if ($default_type == 'Person') { $new_entity = PersonTable::parseFlatName($name); } else { $new_entity = new Entity(); $new_entity->addExtension('Org'); $new_entity->name = trim($name); } $new_entity->save(); $new_entity->blurb = $request->getParameter('new_blurb_' . $i); $new_entity->summary = $request->getParameter('new_summary_' . $i); if (!$ref) { $ref = $default_ref; } $new_entity->addReference($ref['source'], null, null, $ref['name']); if ($types = $request->getParameter('new_extensions_' . $i)) { foreach ($types as $type) { $new_entity->addExtension($type); } } $new_entity->save(); $selected_entity_id = $new_entity->id; } else { if ($entity_id > 0) { $selected_entity_id = $entity_id; LsCache::clearEntityCacheById($selected_entity_id); } } if ($selected_entity_id) { $startDate = $relParams['start_date']; $endDate = $relParams['end_date']; unset($relParams['start_date'], $relParams['end_date'], $relParams['ref_name'], $relParams['ref_url']); $rel = new Relationship(); $rel->setCategory($relationship_category); if ($order == '1') { $rel->entity1_id = $this->entity['id']; $rel->entity2_id = $selected_entity_id; } else { $rel->entity2_id = $this->entity['id']; $rel->entity1_id = $selected_entity_id; } //only set dates if valid if ($startDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($startDate))) { $rel->start_date = Dateable::convertForDb($startDate); } if ($endDate && preg_match('#^\\d{4}-\\d{2}-\\d{2}$#', Dateable::convertForDb($endDate))) { $rel->end_date = Dateable::convertForDb($endDate); } $rel->fromArray($relParams, null, $hydrateCategory = true); if ($request->hasParameter('add_method') && $request->getParameter('add_method') == 'db_search') { $refs = EntityTable::getSummaryReferences($selected_entity_id); if (count($refs)) { $ref = $refs[0]; } else { $refs = EntityTable::getAllReferencesById($selected_entity_id); if (count($refs)) { $ref = $refs[0]; } } } if (!$ref) { $ref = $default_ref; } $rel->saveWithRequiredReference(array('source' => $ref['source'], 'name' => $ref['name'])); $ref = null; } } } $this->clearCache($this->entity); $this->redirect($this->entity->getInternalUrl()); } else { if ($request->isMethod('post') && $request->getParameter('commit') == 'Cancel') { $this->redirect($this->entity->getInternalUrl()); } } } } }