public function ClearUpVariables($data) { //amend values as I wish $data['country_code'] = strtolower($data['country_code']); //country_slug if (!empty($data['country_name'])) { $slug = new SlugClass(); $data['country_slug'] = $slug->slugify($data['country_name']); } return $data; }
public function ClearUpVariables($data, $hour_stamp) { //amend values as I wish $data['hour_stamp'] = $hour_stamp; $data['hour_stamp_flight_number'] = str_replace(' ', '', $hour_stamp . $data['flight_number']); //unique column $data['scheduled_time'] = Carbon::createFromFormat('d.m. H:i', $data['scheduled_time']); //airline code preg_match("|^..|", $data['flight_number'], $output_array_3); $data['airline_code'] = strtolower($output_array_3[0]); if ($data['airline_code'] == "ez") { $data['airline_code'] = "u2"; } //easyjet not using their IATA code U2 but EZY - manual correction //terminal preg_match("|[1-9]|", $data['terminal'], $output_array_4); $data['terminal'] = $output_array_4[0]; //destination $data['destination'] = strtolower($data['destination']); //keep all such data in lowercase (because Sqlite differentiate between Capitals and lower) preg_match("|(.*)\\((.*)\\)|", $data['destination'], $output_array_1); if (count($output_array_1) > 1) { $CitySlashAirport = $output_array_1[1]; $data['country_code'] = $output_array_1[2]; } else { $CitySlashAirport = null; $data['country_code'] = 'xx'; } unset($data['destination']); //in case some strange codes comes $country = Country::where('country_code', $data['country_code']); if (count($country) == 0) { $newcountry = new Country(); $newcountry->country_code = $data['country_code']; $newcountry->country_name = 'Undefined'; $newcountry->save(); } $output_array_2 = preg_split("|\\/|", $CitySlashAirport); $data['city'] = $output_array_2[0]; if (empty($data['city'])) { $data['city'] = 'empty'; } if (count($output_array_2) > 1) { $data['airport'] = $output_array_2[1]; } else { $data['airport'] = null; } //city_slug $slug = new SlugClass(); $data['city_slug'] = $slug->slugify($data['city']); //find if this city_slug is in cities table $city_slug_in_cities = City::where('city_slug', $data['city_slug'])->first(); if (empty($city_slug_in_cities)) { $city_slug_bridge = Slug::where('city_slug_dirty', $data['city_slug'])->first(); if (!empty($city_slug_bridge)) { $data['city_slug'] = $city_slug_bridge->city_slug_clean; echo $data['city_slug']; } } $data = array_map('trim', $data); //trim values return $data; }
public function Geocoding() { //take addresses not yet scraped in the past or incorrectly scraped, geocode scraping, save to cities table $addressesWithoutGeolocation = Address::whereNull('city_id')->get(); $GeoScraped = new GoogleMapsScraper(); foreach ($addressesWithoutGeolocation as $address) { $gmaps_output = $GeoScraped->Geocode($address->raw_input); //get gmaps_status $dataArray['gmaps_status'] = null; $dataArray = parent::ExtractWithXpath($gmaps_output, $this->GeoPatterns, 'text', 'first', 'xml'); //get address_components in array $componentsArray = array(); $componentsArray = parent::ExtractWithXpath($gmaps_output, $this->AddressComponentsPattern, 'html', 'multiple', 'xml'); $cityArray['city_name'] = null; $countryArray['country_code'] = null; $countryArray['country_name'] = null; //search country component in every address component, once found extract name and code foreach ($componentsArray as $AddressComponent) { if (preg_match("|type>country<|", $AddressComponent)) { $AddressComponent = '<result>' . $AddressComponent . '</result>'; //get country_code, country_name $countryArray = parent::ExtractWithXpath($AddressComponent, $this->CountryPatterns, 'text', 'first', 'xml'); } elseif (preg_match("|type>natural_feature<|", $AddressComponent)) { $AddressComponent = '<result>' . $AddressComponent . '</result>'; //get country_code, country_name $cityArray = parent::ExtractWithXpath($AddressComponent, $this->IslandPatterns, 'text', 'first', 'xml'); } elseif (preg_match("|type>locality<|", $AddressComponent)) { $AddressComponent = '<result>' . $AddressComponent . '</result>'; //get country_code, country_name $cityArray = parent::ExtractWithXpath($AddressComponent, $this->CityPatterns, 'text', 'first', 'xml'); } } if (empty($cityArray['city_name']) and !empty($cityArray['island'])) { $cityArray['city_name'] = $cityArray['island']; } if (empty($countryArray['country_code'])) { $countryArray['country_code'] = 'xx'; } //if city doesn't exist, save it to cities $city = City::firstOrNew(['city_name' => $cityArray['city_name'], 'country_code' => strtolower($countryArray['country_code'])]); $city->gmaps_status = $dataArray['gmaps_status']; $city->gmaps_output = $gmaps_output; $city->country_name = $countryArray['country_name']; //city_slug $slug = new SlugClass(); $city->city_slug = $slug->slugify($cityArray['city_name']); $city->save(); //save foreign key in addresses $address->city_id = $city->id; $address->save(); } }
public function slugs(Request $request) { $fixFlightsCitiesJoins = new SlugClass(); $fixFlightsCitiesJoins->UpdateSlugTable($request); $fixFlightsCitiesJoins->UpdateFlightsTable($request); return redirect('scraper/slugs'); }