Пример #1
0
 /**
  * @param $elementHtml
  */
 public function __construct($elementHtml)
 {
     $this->id = Helpers\RegExp::getFirstMatch(Config::get('maltapark.idForListItem'), $elementHtml);
     $this->img_url = Helpers\RegExp::getFirstMatch(Config::get('maltapark.imgForListItem'), $elementHtml);
     $this->title = strip_tags(Helpers\RegExp::getFirstMatch(Config::get('maltapark.titleForListItem'), $elementHtml));
     $this->price = Helpers\RegExp::getFirstMatch(Config::get('maltapark.priceForListItem'), $elementHtml);
     $this->price_val = Helpers\TextFormatter::currencyStringToFloat($this->price);
     $this->date = Helpers\RegExp::getFirstMatch(Config::get('maltapark.dateForListItem'), $elementHtml);
 }
Пример #2
0
 /**
  * @return array
  */
 static function getSectionsFromNet()
 {
     $sections = [];
     $contents = Helpers\FakeBrowser::get(Config::get('maltapark.url'));
     if (!$contents) {
         return [];
     }
     $matches = Helpers\RegExp::getAllMatch(Config::get('maltapark.categoryRegexp'), $contents);
     for ($i = 0; $i < count($matches[1]); $i++) {
         $sections[] = new Section($matches[1][$i], $matches[2][$i]);
     }
     return $sections;
 }
Пример #3
0
 /**
  * @param $elementHtml
  * @param $itemId
  */
 public function __construct($elementHtml, $itemId)
 {
     $this->id = $itemId;
     $this->img_url = Config::get('maltapark.detailImgUrl');
     $this->img_url .= Helpers\RegExp::getFirstMatch(Config::get('maltapark.imgForItemDetail'), $elementHtml);
     $this->title = strip_tags(Helpers\RegExp::getFirstMatch(Config::get('maltapark.titleForItemDetail'), $elementHtml));
     $this->price = Helpers\RegExp::getFirstMatch(Config::get('maltapark.priceForItemDetail'), $elementHtml);
     $this->price_val = Helpers\TextFormatter::currencyStringToFloat($this->price);
     $this->contact = Config::get('maltapark.contactImgUrl');
     $this->contact .= Helpers\RegExp::getFirstMatch(Config::get('maltapark.contactForItemDetail'), $elementHtml);
     $this->description = strip_tags(Helpers\RegExp::getFirstMatch(Config::get('maltapark.descriptionForItemDetail'), $elementHtml));
     foreach ($this->topDetailsLabel as $lblKey => $lblRegex) {
         $this->{$lblKey} = Helpers\RegExp::getFirstMatch($lblRegex . Config::get('maltapark.topDetailForItemDetail'), $elementHtml);
     }
 }
Пример #4
0
 public function run()
 {
     $url = $this->argument[1];
     $content = FakeBrowser::get($url);
     $items = [];
     $crawler = new Crawler(null, $url);
     $crawler->addContent($content, "text/html");
     foreach ($crawler->filter('.result_box') as $node) {
         $item = Helpers\Dom::getHtml($node);
         $items[] = $item;
     }
     echo "Found " . count($items) . " items";
     echo "\n";
     echo "parsing items\n";
     $i = 1;
     $withEmail = [];
     $notEmail = [];
     foreach ($items as $item) {
         echo $i . " ";
         $emailUrl = RegExp::getFirstMatch('/;" href="(.+?)\\?v=e/', $item);
         if ($emailUrl !== null) {
             $emailUrl .= "?v=e";
             $withEmail[] = $emailUrl;
             echo $emailUrl;
             echo " had email";
         } else {
             $notEmailUrl = RegExp::getFirstMatch('/;" href="(.+?)".+><h2>/', $item);
             $notEmail[] = $notEmailUrl;
             echo $notEmailUrl;
             echo " had no email";
         }
         echo "\n";
         $i++;
     }
     echo "Fetching the email from YellowPage (total:" . count($withEmail) . ")";
     echo "\n";
     $i = 1;
     foreach ($withEmail as $emailUrl) {
         echo "{$i}. ";
         $content = FakeBrowser::get($emailUrl);
         $idMail = RegExp::getFirstMatch('/href=".+send-email.html\\?(.+?)"/', $content);
         if (null !== $idMail) {
             //Get Email
             $ypEmailUrl = 'https://www.yellow.com.mt/development-tools/send-email-iframe.html?';
             $ypEmailUrl .= $idMail;
             $iframe = FakeBrowser::get($ypEmailUrl);
             $email = RegExp::getFirstMatch('/company_email" value="(.+?)"/', $iframe);
             if ($email !== null) {
                 echo " found email: {$email}\n";
                 $this->resultMails[] = $email;
             } else {
                 echo " error on iframe\n";
             }
         } else {
             echo "Error on" . $emailUrl . "\n";
         }
         $i++;
     }
     $i = 1;
     $websites = [];
     $notWebsite = [];
     echo "\nAttempt to get other email (total:" . count($notEmail) . ")\n";
     foreach ($notEmail as $item) {
         echo "{$i}. ";
         $websiteUrl = FakeBrowser::get($item);
         $websiteUrl = RegExp::getFirstMatch('/redirector.php\\?yelref=[^"](.+?)"/', $websiteUrl);
         if ($websiteUrl !== null) {
             $websiteUrl = "h" . $websiteUrl;
             echo "found webSite: {$websiteUrl}\n";
             $websites[] = $websiteUrl;
         } else {
             $notWebsite[] = $item;
             echo "{$item} does not have a website apparently\n";
         }
         $i++;
     }
     $this->noWebSiteOnYP = $notWebsite;
     echo "\nChecking in the company websites (total:" . count($websites) . ")\n";
     $i = 1;
     $notMailOnIndex = [];
     foreach ($websites as $website) {
         echo "{$i}. ";
         $contactPage = $this->searchContactPage($website);
         if (!empty($contactPage)) {
             echo " contact page not Empty for {$website}, searching for email\n";
             $email = RegExp::getFirstMatch('/mailto:(.+?)"/', $contactPage);
             if (!empty($email)) {
                 echo "\t found {$email}\n";
                 $this->resultMails[] = $email;
             } else {
                 echo "\t no mailto found\n";
                 echo "\t trying from homePage\n";
                 $contactPage = FakeBrowser::get($website);
                 $email = RegExp::getFirstMatch('/mailto:(.+?)"/', $contactPage);
                 if (!empty($email)) {
                     echo "\t found {$email}\n";
                     $this->resultMails[] = $email;
                 } else {
                     echo "\t no mailto found\n";
                     $notMailOnIndex[] = $website;
                 }
             }
         } else {
             $notMailOnIndex[] = $website;
         }
         $i++;
     }
     $this->websiteNoEmail = $notMailOnIndex;
     //now we should crawl those $notMailOnIndex from scratch
 }