public function ClearUpVariables($data)
 {
     //amend values as I wish
     $data['country_code'] = strtolower($data['country_code']);
     //country_slug
     if (!empty($data['country_name'])) {
         $slug = new SlugClass();
         $data['country_slug'] = $slug->slugify($data['country_name']);
     }
     return $data;
 }
 public function ClearUpVariables($data, $hour_stamp)
 {
     //amend values as I wish
     $data['hour_stamp'] = $hour_stamp;
     $data['hour_stamp_flight_number'] = str_replace(' ', '', $hour_stamp . $data['flight_number']);
     //unique column
     $data['scheduled_time'] = Carbon::createFromFormat('d.m. H:i', $data['scheduled_time']);
     //airline code
     preg_match("|^..|", $data['flight_number'], $output_array_3);
     $data['airline_code'] = strtolower($output_array_3[0]);
     if ($data['airline_code'] == "ez") {
         $data['airline_code'] = "u2";
     }
     //easyjet not using their IATA code U2 but EZY - manual correction
     //terminal
     preg_match("|[1-9]|", $data['terminal'], $output_array_4);
     $data['terminal'] = $output_array_4[0];
     //destination
     $data['destination'] = strtolower($data['destination']);
     //keep all such data in lowercase (because Sqlite differentiate between Capitals and lower)
     preg_match("|(.*)\\((.*)\\)|", $data['destination'], $output_array_1);
     if (count($output_array_1) > 1) {
         $CitySlashAirport = $output_array_1[1];
         $data['country_code'] = $output_array_1[2];
     } else {
         $CitySlashAirport = null;
         $data['country_code'] = 'xx';
     }
     unset($data['destination']);
     //in case some strange codes comes
     $country = Country::where('country_code', $data['country_code']);
     if (count($country) == 0) {
         $newcountry = new Country();
         $newcountry->country_code = $data['country_code'];
         $newcountry->country_name = 'Undefined';
         $newcountry->save();
     }
     $output_array_2 = preg_split("|\\/|", $CitySlashAirport);
     $data['city'] = $output_array_2[0];
     if (empty($data['city'])) {
         $data['city'] = 'empty';
     }
     if (count($output_array_2) > 1) {
         $data['airport'] = $output_array_2[1];
     } else {
         $data['airport'] = null;
     }
     //city_slug
     $slug = new SlugClass();
     $data['city_slug'] = $slug->slugify($data['city']);
     //find if this city_slug is in cities table
     $city_slug_in_cities = City::where('city_slug', $data['city_slug'])->first();
     if (empty($city_slug_in_cities)) {
         $city_slug_bridge = Slug::where('city_slug_dirty', $data['city_slug'])->first();
         if (!empty($city_slug_bridge)) {
             $data['city_slug'] = $city_slug_bridge->city_slug_clean;
             echo $data['city_slug'];
         }
     }
     $data = array_map('trim', $data);
     //trim values
     return $data;
 }
Пример #3
0
 public function Geocoding()
 {
     //take addresses not yet scraped in the past or incorrectly scraped, geocode scraping, save to cities table
     $addressesWithoutGeolocation = Address::whereNull('city_id')->get();
     $GeoScraped = new GoogleMapsScraper();
     foreach ($addressesWithoutGeolocation as $address) {
         $gmaps_output = $GeoScraped->Geocode($address->raw_input);
         //get gmaps_status
         $dataArray['gmaps_status'] = null;
         $dataArray = parent::ExtractWithXpath($gmaps_output, $this->GeoPatterns, 'text', 'first', 'xml');
         //get address_components in array
         $componentsArray = array();
         $componentsArray = parent::ExtractWithXpath($gmaps_output, $this->AddressComponentsPattern, 'html', 'multiple', 'xml');
         $cityArray['city_name'] = null;
         $countryArray['country_code'] = null;
         $countryArray['country_name'] = null;
         //search country component in every address component, once found extract name and code
         foreach ($componentsArray as $AddressComponent) {
             if (preg_match("|type>country<|", $AddressComponent)) {
                 $AddressComponent = '<result>' . $AddressComponent . '</result>';
                 //get country_code, country_name
                 $countryArray = parent::ExtractWithXpath($AddressComponent, $this->CountryPatterns, 'text', 'first', 'xml');
             } elseif (preg_match("|type>natural_feature<|", $AddressComponent)) {
                 $AddressComponent = '<result>' . $AddressComponent . '</result>';
                 //get country_code, country_name
                 $cityArray = parent::ExtractWithXpath($AddressComponent, $this->IslandPatterns, 'text', 'first', 'xml');
             } elseif (preg_match("|type>locality<|", $AddressComponent)) {
                 $AddressComponent = '<result>' . $AddressComponent . '</result>';
                 //get country_code, country_name
                 $cityArray = parent::ExtractWithXpath($AddressComponent, $this->CityPatterns, 'text', 'first', 'xml');
             }
         }
         if (empty($cityArray['city_name']) and !empty($cityArray['island'])) {
             $cityArray['city_name'] = $cityArray['island'];
         }
         if (empty($countryArray['country_code'])) {
             $countryArray['country_code'] = 'xx';
         }
         //if city doesn't exist, save it to cities
         $city = City::firstOrNew(['city_name' => $cityArray['city_name'], 'country_code' => strtolower($countryArray['country_code'])]);
         $city->gmaps_status = $dataArray['gmaps_status'];
         $city->gmaps_output = $gmaps_output;
         $city->country_name = $countryArray['country_name'];
         //city_slug
         $slug = new SlugClass();
         $city->city_slug = $slug->slugify($cityArray['city_name']);
         $city->save();
         //save foreign key in addresses
         $address->city_id = $city->id;
         $address->save();
     }
 }
Пример #4
0
 public function slugs(Request $request)
 {
     $fixFlightsCitiesJoins = new SlugClass();
     $fixFlightsCitiesJoins->UpdateSlugTable($request);
     $fixFlightsCitiesJoins->UpdateFlightsTable($request);
     return redirect('scraper/slugs');
 }