Example #1
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://claremontindependent.com/category/campus-news/');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $crawler->filter('div.site-content article')->each(function ($node) {
         $url = $node->filter('a.post-thumbnail')->attr('href');
         //url
         $imgUrl = $node->filter('a.post-thumbnail img')->attr('src');
         //imgUrl
         $title = $node->filter('header h1')->text();
         //title
         $date = substr($node->filter('header div.entry-meta span.entry-date time')->attr('datetime'), 0, 10);
         //date
         $author = $node->filter('header div.entry-meta span.byline span.author a')->text();
         //author
         $description = substr(mb_convert_encoding($node->filter('div.entry-content p')->text(), "HTML-ENTITIES", "UTF-8"), 0, 500);
         //description
         $id = getArticleId($date);
         if (Posts::where('title', '=', $title)->exists()) {
             echo $title . " already exists";
         } else {
             $post = new Posts();
             $post->article_id = $id;
             $post->author = $author;
             $post->title = $title;
             $post->description = $description;
             $post->imgUrl = $imgUrl;
             $post->url = $url;
             $post->source = 'Claremont Independent';
             $post->save();
             echo "stored " . $title . "!";
         }
     });
     $client = new Client();
     $crawler = $client->request('GET', 'http://www.thegoldenantlers.com/');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $count = 0;
     $crawler->filter('div.bl_posts_information')->each(function ($node, $count) {
         if ($count == 0) {
             $node->filter('article')->each(function ($node1) {
                 $author = ucwords(str_replace('-', ' ', $node1->attr('data-categoryslug')));
                 //author
                 $url = $node1->filter('div.post-image a')->attr('href');
                 //url
                 $imgUrl = $node1->filter('div.post-image img')->attr('src');
                 //imgUrl
                 $date = $node1->filter('div.post-image div.meta-info div.info-date time')->attr('datetime');
                 //date
                 $date = substr(createDateAntlers($date), 0, 10);
                 $title = $node1->filter('div.post-body h3')->text();
                 //title
                 $description = str_replace("CONTINUE READING", "", trimWhiteSpace(str_replace("&nbsp;", "", mb_convert_encoding($node1->filter('div.post-body div.post-content p')->text(), "HTML-ENTITIES", "UTF-8"))));
                 //description
                 $id = getArticleId($date);
                 if (Posts::where('title', '=', $title)->exists()) {
                     echo $title . " already exists";
                 } else {
                     $post = new Posts();
                     $post->article_id = $id;
                     $post->author = $author;
                     $post->title = $title;
                     $post->description = $description;
                     $post->imgUrl = $imgUrl;
                     $post->url = $url;
                     $post->source = 'The Golden Antlers';
                     $post->save();
                     echo "stored " . $title . "!";
                 }
             });
         }
         $count++;
     });
     $client = new Client();
     $crawler = $client->request('GET', 'http://cmcforum.com/');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $count = 0;
     $crawler->filter('div.td-big-grid-wrapper div.td-big-grid-post')->each(function ($node, $count) {
         if ($count == 0) {
             $url = $node->filter('div.td-module-thumb a')->attr('href');
             //url no clean
             $imgUrl = $node->filter('a img')->attr('src');
             //imgUrl no clean
             $title = $node->filter('div.td-big-grid-meta h3 a')->text();
             //clean title
             $date = substr($node->filter('div.td-big-grid-meta div.td-module-meta-info div.td-post-date time')->attr('datetime'), 0, 10);
             //date clean
             $author = $node->filter('div.td-big-grid-meta div.td-module-meta-info div.td-post-author-name a')->text();
             //author clean
             $client2 = new Client();
             $crawler2 = $client2->request('GET', $url);
             $description = $crawler2->filter('div.td-post-content p')->text();
             $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
             if (Posts::where('title', '=', $title)->exists()) {
                 echo $title . " already exists";
             } else {
                 $post = new Posts();
                 $post->article_id = $id;
                 $post->author = $author;
                 $post->title = $title;
                 $post->description = $description;
                 $post->imgUrl = $imgUrl;
                 $post->url = $url;
                 $post->source = 'The Forum';
                 $post->save();
                 echo "stored " . $title . "!";
             }
         }
         $count++;
     });
 }
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'https://www.cmc.edu/athenaeum/fall-2015-calendar');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $count = 0;
     $crawler->filter('table tr')->each(function ($node, $count) {
         $date = trimWhiteSpace($node->children()->eq(0)->text());
         //date & when
         if (strpos($date, 'Oct. 14') !== false) {
             $date = getAthDate2($date);
         } else {
             $date = getDateAthSpeakers($date);
         }
         $year = $date[0];
         $month = date_parse($date[1]);
         $month = $month['month'];
         $day = trimWhiteSpace($date[2]);
         $when = 'Dinner';
         if (!is_numeric($day)) {
             $when = 'Lunch @ 11:30';
             $day = substr(preg_replace("/[^0-9,.]/", "", $day), 0, -4);
             //gets day -1130
         }
         if (strlen($day) < 2) {
             $day = "0" . $day;
             //add 0 if its single digit
         }
         $date = $year . "-" . $month . "-" . $day;
         //add together
         echo $date;
         $date = Carbon::createFromFormat('Y-m-d', $date)->toDateString();
         //format
         if ($node->children()->eq(1)->children()->eq(1)->count() > 0) {
             echo "not empty" . "<br>";
             $description = mb_convert_encoding(trimWhiteSpace($node->children()->eq(1)->children()->eq(1)->text()), "HTML-ENTITIES", "UTF-8");
             //description
             $title = mb_convert_encoding(trimWhiteSpace($node->children()->eq(1)->children()->eq(0)->text()), "HTML-ENTITIES", "UTF-8");
             $speaker = 'N/A';
             if ($node->children()->eq(1)->children()->eq(1)->filter('strong')->count() > 0) {
                 $speaker = mb_convert_encoding(trimWhiteSpace($node->children()->eq(1)->children()->eq(1)->filter('strong')->text()), "HTML-ENTITIES", "UTF-8");
                 //speaker
             }
             $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
             if (AthSpeakers::where('speaker', '=', $speaker)->exists()) {
                 echo $speaker . " already exists";
             } else {
                 $event = new AthSpeakers();
                 $event->article_id = $id;
                 $event->speaker = $speaker;
                 $event->description = $description;
                 $event->title = $title;
                 $event->event_time = $when;
                 $event->save();
                 echo "stored " . $speaker . "!";
             }
         }
         $count++;
     });
 }
Example #3
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://www.cmsathletics.org/composite');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $month = $crawler->filter('table.calendar tr td.calendar-month')->text();
     $month = explode(' ', $month);
     $year = trimWhiteSpace($month[1]);
     $month = trimWhiteSpace($month[0]);
     $month = date_parse($month);
     $month = $month['month'];
     $countRow = 0;
     $crawler->filter('table.calendar')->filter('tr')->each(function ($node, $countRow) use($month, $year) {
         $node->filter('td div.calendar-event')->each(function ($node1) use($month, $year, $countRow) {
             echo $countRow;
             $day = trimWhiteSpace($node1->parents()->filter('div.calendar-date')->text());
             if ($day < 10 && $countRow > 3) {
                 $month++;
             }
             if ($day > 20 && $countRow < 1) {
                 $month--;
             }
             if (strlen($day) < 2) {
                 $day = "0" . $day;
                 //add 0 if its single digit
             }
             $team = $node1->filter('div.calendar-sport')->text();
             //team
             $length = strlen($team);
             $info = substr(trimWhiteSpace(preg_replace("/\\s+/", " ", $node1->text())), $length + 1);
             $time = trimWhiteSpace(substr($info, -8));
             $time = explode(" ", $time);
             $time1 = $time[0] . ":00";
             $time = strtotime($time1 . " " . $time[1]);
             $time = substr(Carbon::createFromTimeStamp($time)->toDateTimeString(), -8);
             $opponent = substr($info, 0, -8);
             $opponent = trimWhiteSpace($opponent);
             if (strlen($month) < 2) {
                 $month = "0" . $month;
                 //add 0 if its single digit
             }
             $date = $year . "-" . $month . "-" . $day;
             echo $date;
             $datetime = $date . " " . $time;
             $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
             if (Sports::where('team', '=', $team)->where('opponent', '=', $opponent)->exists()) {
                 echo $team . " " . $opponent . " already exists";
             } else {
                 $game = new Sports();
                 $game->article_id = $id;
                 $game->college = 'CMS';
                 $game->team = $team;
                 $game->opponent = $opponent;
                 $game->time_start = $datetime;
                 $game->save();
                 echo $team . " " . $opponent . " stored!";
             }
         });
         $countRow++;
     });
     $client = new Client();
     $crawler = $client->request('GET', 'http://www.pe.pomona.edu/composite');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $month = $crawler->filter('table tr.cal-nav td[colspan="5"]')->text();
     $month = explode(' ', $month);
     $year = trimWhiteSpace($month[1]);
     $month = trimWhiteSpace($month[0]);
     $month = date_parse($month);
     $month = $month['month'];
     $countRow = 0;
     $crawler->filter('table')->filter('tr')->each(function ($node, $countRow) use($month, $year) {
         $node->filter('td.cal-day div.cal-event')->each(function ($node1) use($month, $year, $countRow) {
             $day = trimWhiteSpace($node1->parents()->filter('div.cal-date')->text());
             if ($day < 10 && $countRow > 2) {
                 $month++;
             }
             if ($day > 20 && $countRow < 1) {
                 $month--;
             }
             if (strlen($day) < 2) {
                 $day = "0" . $day;
                 //add 0 if its single digit
             }
             $team = trimWhiteSpace($node1->filter('div.cal-sport')->text());
             //team
             $length = strlen($team);
             $time = $node1->filter('div.cal-sport')->nextAll()->first()->nextAll()->first()->text();
             $date = $year . "-" . $month . "-" . $day;
             if ($time == 'TBA') {
                 $datetime = $date . " " . "00:00:00";
             } else {
                 $time = explode(" ", $time);
                 $time1 = $time[0] . ":00";
                 $time = strtotime($time1 . " " . $time[1]);
                 $time = substr(Carbon::createFromTimeStamp($time)->toDateTimeString(), -8);
                 if (strlen($month) < 2) {
                     $month = "0" . $month;
                     //add 0 if its single digit
                 }
                 $datetime = $date . " " . $time;
             }
             $opponent = trimWhiteSpace($node1->filter('div.cal-sport')->nextAll()->first()->text());
             $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
             if (Sports::where('team', '=', $team)->where('opponent', '=', $opponent)->exists()) {
                 echo $team . " " . $opponent . " already exists";
             } else {
                 $game = new Sports();
                 $game->article_id = $id;
                 $game->college = 'PP';
                 $game->team = $team;
                 $game->opponent = $opponent;
                 $game->time_start = $datetime;
                 $game->save();
                 echo $team . " " . $opponent . " stored!";
             }
         });
         $countRow++;
     });
 }
Example #4
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://www.cmc.edu/news/events');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $time2 = null;
     $crawler->filter('div.article')->each(function ($node) {
         $title = trimWhiteSpace($node->filter('h4')->text());
         //title
         $url = "http://www.cmc.edu" . $node->filter('h4 a')->attr('href');
         //url
         $times = $node->filter('p span')->each(function ($node2, $count) {
             //save the values of time in array
             if ($count = 0 && $node2->attr('content')) {
                 $times[0] = $node2->attr('content');
                 return $times;
             }
             if ($count = 1 && $node2->attr('content')) {
                 $times[1] = $node2->attr('content');
                 return $times;
             }
             $count++;
         });
         $times = array_slice(array_filter($times), 0);
         //array gets messy, cleaned up
         $time1 = substr(str_replace("T", " ", $times[0][1]), 0, -6);
         //remove T, and remove last 6 digits of time
         $date = substr($time1, 0, 10);
         //get date of start
         if (isset($times[1][1])) {
             $time2 = substr(str_replace("T", " ", $times[1][1]), 0, -6);
             //second time
         }
         echo "<br>";
         $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
         if (EventAndNews::where('title', '=', $title)->exists()) {
             echo $title . " already exists";
         } else {
             $event = new EventAndNews();
             $event->article_id = $id;
             $event->title = $title;
             $event->url = $url;
             $event->time1 = $time1;
             if (isset($time2)) {
                 $event->time2 = $time2;
             }
             $event->type = 'event';
             $event->save();
             echo "stored!";
         }
     });
     $client = new Client();
     $crawler = $client->request('GET', 'http://www.cmc.edu/news/news-releases');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $crawler->filter('div.view-content > div')->each(function ($node) {
         $imgUrl = null;
         $title = $node->filter('h4 a')->text();
         //title
         $url = "http://www.cmc.edu" . $node->filter('h4 a')->attr('href');
         //url
         if ($node->filter('a img')->count()) {
             $imgUrl = $node->filter('img')->attr('src');
             //img src
         }
         $date = $node->filter('p')->text();
         $date = createDate($date);
         //date
         $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
         if (EventAndNews::where('title', '=', $title)->exists()) {
             echo $title . " exists";
             echo "<br>";
         } else {
             $event = new EventAndNews();
             $event->article_id = $id;
             $event->title = $title;
             $event->url = $url;
             if (isset($imgUrl)) {
                 $event->imgUrl = $imgUrl;
             }
             $event->time1 = $date;
             $event->type = 'news';
             $event->save();
             echo "stored: " . $title;
         }
     });
 }
Example #5
0
 /**
  * Execute the console command.
  *
  * @return mixed
  */
 public function handle()
 {
     $client = new Client();
     $crawler = $client->request('GET', 'http://www1.claremontmckenna.edu/mmca/cur_menu.php');
     $status_code = $client->getResponse()->getStatus();
     if ($status_code == 200) {
         echo '200 OK<br>';
     }
     $count = 0;
     $crawler->filter('table')->last()->each(function ($node) {
         $times = $node->filter('tr')->filter('td')->each(function ($node2, $count) {
             if ($count % 2 == 0) {
                 $date = $node2->text();
                 $date = explode(',', $date);
                 $date = $date[1];
                 $date = explode(' ', $date);
                 $month = date_parse($date[0]);
                 $month = $month['month'];
                 $day = $date[1];
                 if (strlen($day) < 2) {
                     $day = "0" . $day;
                     //add 0 if its single digit
                 }
                 $year = 2015;
                 $date = $year . "-" . $month . "-" . $day;
                 //add together
                 $date = Carbon::createFromFormat('Y-m-d', $date)->toDateString();
                 //format
                 return $date;
             }
         });
         $values = $node->filter('tr')->filter('td')->each(function ($node1, $count) {
             if ($count % 2 == 1) {
                 $foods = trimWhiteSpace($node1->children()->last()->children()->first()->html());
                 $main = trimWhiteSpace($node1->children()->last()->children()->last()->html());
                 $foods = ltrim(rtrim(str_replace("<br>", '-', $foods), "-"), "-");
                 $foods = explode('-', $foods);
                 $main = ltrim(rtrim(str_replace("--", "-", str_replace("<br>", '-', $main)), "-"), "-");
                 $main = explode('-', $main);
                 return $main;
             }
             $count++;
         });
         $values = array_slice(array_filter($values), 0);
         $times = array_slice(array_filter($times), 0);
         for ($i = 0; $i < count($values); $i++) {
             $foods = "";
             $count_food = 0;
             for ($j = 0; $j < count($values[$i]); $j++) {
                 $foods[$j] = $values[$i][$j];
             }
             $date = $times[$i];
             $id = DB::table('email_articles')->where('post_date', $date)->value('article_id');
             $ath_id = DB::table('ath')->where('article_id', $id)->where('event_time', 'N/A')->value('ath_id');
             if (AthFood::where('food_1', '=', $foods[0])->exists()) {
                 echo $foods[0] . " already exists";
             } else {
                 $menu = new AthFood();
                 $menu->article_id = $id;
                 $menu->ath_id = $ath_id;
                 $menu->food_1 = $foods[0];
                 if (isset($foods[1])) {
                     $menu->food_2 = $foods[1];
                 }
                 if (isset($foods[2])) {
                     $menu->food_3 = $foods[2];
                 }
                 if (isset($foods[3])) {
                     $menu->food_4 = $foods[3];
                 }
                 if (isset($foods[4])) {
                     $menu->food_5 = $foods[4];
                 }
                 $menu->save();
                 echo "stored!";
             }
         }
     });
 }