/** * Execute the console command. * * @return mixed */ public function handle() { $client = new Client(); $crawler = $client->request('GET', 'http://claremontindependent.com/category/campus-news/'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $crawler->filter('div.site-content article')->each(function ($node) { $url = $node->filter('a.post-thumbnail')->attr('href'); //url $imgUrl = $node->filter('a.post-thumbnail img')->attr('src'); //imgUrl $title = $node->filter('header h1')->text(); //title $date = substr($node->filter('header div.entry-meta span.entry-date time')->attr('datetime'), 0, 10); //date $author = $node->filter('header div.entry-meta span.byline span.author a')->text(); //author $description = substr(mb_convert_encoding($node->filter('div.entry-content p')->text(), "HTML-ENTITIES", "UTF-8"), 0, 500); //description $id = getArticleId($date); if (Posts::where('title', '=', $title)->exists()) { echo $title . " already exists"; } else { $post = new Posts(); $post->article_id = $id; $post->author = $author; $post->title = $title; $post->description = $description; $post->imgUrl = $imgUrl; $post->url = $url; $post->source = 'Claremont Independent'; $post->save(); echo "stored " . $title . "!"; } }); $client = new Client(); $crawler = $client->request('GET', 'http://www.thegoldenantlers.com/'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $count = 0; $crawler->filter('div.bl_posts_information')->each(function ($node, $count) { if ($count == 0) { $node->filter('article')->each(function ($node1) { $author = ucwords(str_replace('-', ' ', $node1->attr('data-categoryslug'))); //author $url = $node1->filter('div.post-image a')->attr('href'); //url $imgUrl = $node1->filter('div.post-image img')->attr('src'); //imgUrl $date = $node1->filter('div.post-image div.meta-info div.info-date time')->attr('datetime'); //date $date = substr(createDateAntlers($date), 0, 10); $title = $node1->filter('div.post-body h3')->text(); //title $description = str_replace("CONTINUE READING", "", trimWhiteSpace(str_replace(" ", "", mb_convert_encoding($node1->filter('div.post-body div.post-content p')->text(), "HTML-ENTITIES", "UTF-8")))); //description $id = getArticleId($date); if (Posts::where('title', '=', $title)->exists()) { echo $title . " already exists"; } else { $post = new Posts(); $post->article_id = $id; $post->author = $author; $post->title = $title; $post->description = $description; $post->imgUrl = $imgUrl; $post->url = $url; $post->source = 'The Golden Antlers'; $post->save(); echo "stored " . $title . "!"; } }); } $count++; }); $client = new Client(); $crawler = $client->request('GET', 'http://cmcforum.com/'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $count = 0; $crawler->filter('div.td-big-grid-wrapper div.td-big-grid-post')->each(function ($node, $count) { if ($count == 0) { $url = $node->filter('div.td-module-thumb a')->attr('href'); //url no clean $imgUrl = $node->filter('a img')->attr('src'); //imgUrl no clean $title = $node->filter('div.td-big-grid-meta h3 a')->text(); //clean title $date = substr($node->filter('div.td-big-grid-meta div.td-module-meta-info div.td-post-date time')->attr('datetime'), 0, 10); //date clean $author = $node->filter('div.td-big-grid-meta div.td-module-meta-info div.td-post-author-name a')->text(); //author clean $client2 = new Client(); $crawler2 = $client2->request('GET', $url); $description = $crawler2->filter('div.td-post-content p')->text(); $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (Posts::where('title', '=', $title)->exists()) { echo $title . " already exists"; } else { $post = new Posts(); $post->article_id = $id; $post->author = $author; $post->title = $title; $post->description = $description; $post->imgUrl = $imgUrl; $post->url = $url; $post->source = 'The Forum'; $post->save(); echo "stored " . $title . "!"; } } $count++; }); }
/** * Execute the console command. * * @return mixed */ public function handle() { $client = new Client(); $crawler = $client->request('GET', 'https://www.cmc.edu/athenaeum/fall-2015-calendar'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $count = 0; $crawler->filter('table tr')->each(function ($node, $count) { $date = trimWhiteSpace($node->children()->eq(0)->text()); //date & when if (strpos($date, 'Oct. 14') !== false) { $date = getAthDate2($date); } else { $date = getDateAthSpeakers($date); } $year = $date[0]; $month = date_parse($date[1]); $month = $month['month']; $day = trimWhiteSpace($date[2]); $when = 'Dinner'; if (!is_numeric($day)) { $when = 'Lunch @ 11:30'; $day = substr(preg_replace("/[^0-9,.]/", "", $day), 0, -4); //gets day -1130 } if (strlen($day) < 2) { $day = "0" . $day; //add 0 if its single digit } $date = $year . "-" . $month . "-" . $day; //add together echo $date; $date = Carbon::createFromFormat('Y-m-d', $date)->toDateString(); //format if ($node->children()->eq(1)->children()->eq(1)->count() > 0) { echo "not empty" . "<br>"; $description = mb_convert_encoding(trimWhiteSpace($node->children()->eq(1)->children()->eq(1)->text()), "HTML-ENTITIES", "UTF-8"); //description $title = mb_convert_encoding(trimWhiteSpace($node->children()->eq(1)->children()->eq(0)->text()), "HTML-ENTITIES", "UTF-8"); $speaker = 'N/A'; if ($node->children()->eq(1)->children()->eq(1)->filter('strong')->count() > 0) { $speaker = mb_convert_encoding(trimWhiteSpace($node->children()->eq(1)->children()->eq(1)->filter('strong')->text()), "HTML-ENTITIES", "UTF-8"); //speaker } $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (AthSpeakers::where('speaker', '=', $speaker)->exists()) { echo $speaker . " already exists"; } else { $event = new AthSpeakers(); $event->article_id = $id; $event->speaker = $speaker; $event->description = $description; $event->title = $title; $event->event_time = $when; $event->save(); echo "stored " . $speaker . "!"; } } $count++; }); }
/** * Execute the console command. * * @return mixed */ public function handle() { $client = new Client(); $crawler = $client->request('GET', 'http://www.cmsathletics.org/composite'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $month = $crawler->filter('table.calendar tr td.calendar-month')->text(); $month = explode(' ', $month); $year = trimWhiteSpace($month[1]); $month = trimWhiteSpace($month[0]); $month = date_parse($month); $month = $month['month']; $countRow = 0; $crawler->filter('table.calendar')->filter('tr')->each(function ($node, $countRow) use($month, $year) { $node->filter('td div.calendar-event')->each(function ($node1) use($month, $year, $countRow) { echo $countRow; $day = trimWhiteSpace($node1->parents()->filter('div.calendar-date')->text()); if ($day < 10 && $countRow > 3) { $month++; } if ($day > 20 && $countRow < 1) { $month--; } if (strlen($day) < 2) { $day = "0" . $day; //add 0 if its single digit } $team = $node1->filter('div.calendar-sport')->text(); //team $length = strlen($team); $info = substr(trimWhiteSpace(preg_replace("/\\s+/", " ", $node1->text())), $length + 1); $time = trimWhiteSpace(substr($info, -8)); $time = explode(" ", $time); $time1 = $time[0] . ":00"; $time = strtotime($time1 . " " . $time[1]); $time = substr(Carbon::createFromTimeStamp($time)->toDateTimeString(), -8); $opponent = substr($info, 0, -8); $opponent = trimWhiteSpace($opponent); if (strlen($month) < 2) { $month = "0" . $month; //add 0 if its single digit } $date = $year . "-" . $month . "-" . $day; echo $date; $datetime = $date . " " . $time; $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (Sports::where('team', '=', $team)->where('opponent', '=', $opponent)->exists()) { echo $team . " " . $opponent . " already exists"; } else { $game = new Sports(); $game->article_id = $id; $game->college = 'CMS'; $game->team = $team; $game->opponent = $opponent; $game->time_start = $datetime; $game->save(); echo $team . " " . $opponent . " stored!"; } }); $countRow++; }); $client = new Client(); $crawler = $client->request('GET', 'http://www.pe.pomona.edu/composite'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $month = $crawler->filter('table tr.cal-nav td[colspan="5"]')->text(); $month = explode(' ', $month); $year = trimWhiteSpace($month[1]); $month = trimWhiteSpace($month[0]); $month = date_parse($month); $month = $month['month']; $countRow = 0; $crawler->filter('table')->filter('tr')->each(function ($node, $countRow) use($month, $year) { $node->filter('td.cal-day div.cal-event')->each(function ($node1) use($month, $year, $countRow) { $day = trimWhiteSpace($node1->parents()->filter('div.cal-date')->text()); if ($day < 10 && $countRow > 2) { $month++; } if ($day > 20 && $countRow < 1) { $month--; } if (strlen($day) < 2) { $day = "0" . $day; //add 0 if its single digit } $team = trimWhiteSpace($node1->filter('div.cal-sport')->text()); //team $length = strlen($team); $time = $node1->filter('div.cal-sport')->nextAll()->first()->nextAll()->first()->text(); $date = $year . "-" . $month . "-" . $day; if ($time == 'TBA') { $datetime = $date . " " . "00:00:00"; } else { $time = explode(" ", $time); $time1 = $time[0] . ":00"; $time = strtotime($time1 . " " . $time[1]); $time = substr(Carbon::createFromTimeStamp($time)->toDateTimeString(), -8); if (strlen($month) < 2) { $month = "0" . $month; //add 0 if its single digit } $datetime = $date . " " . $time; } $opponent = trimWhiteSpace($node1->filter('div.cal-sport')->nextAll()->first()->text()); $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (Sports::where('team', '=', $team)->where('opponent', '=', $opponent)->exists()) { echo $team . " " . $opponent . " already exists"; } else { $game = new Sports(); $game->article_id = $id; $game->college = 'PP'; $game->team = $team; $game->opponent = $opponent; $game->time_start = $datetime; $game->save(); echo $team . " " . $opponent . " stored!"; } }); $countRow++; }); }
/** * Execute the console command. * * @return mixed */ public function handle() { $client = new Client(); $crawler = $client->request('GET', 'http://www.cmc.edu/news/events'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $time2 = null; $crawler->filter('div.article')->each(function ($node) { $title = trimWhiteSpace($node->filter('h4')->text()); //title $url = "http://www.cmc.edu" . $node->filter('h4 a')->attr('href'); //url $times = $node->filter('p span')->each(function ($node2, $count) { //save the values of time in array if ($count = 0 && $node2->attr('content')) { $times[0] = $node2->attr('content'); return $times; } if ($count = 1 && $node2->attr('content')) { $times[1] = $node2->attr('content'); return $times; } $count++; }); $times = array_slice(array_filter($times), 0); //array gets messy, cleaned up $time1 = substr(str_replace("T", " ", $times[0][1]), 0, -6); //remove T, and remove last 6 digits of time $date = substr($time1, 0, 10); //get date of start if (isset($times[1][1])) { $time2 = substr(str_replace("T", " ", $times[1][1]), 0, -6); //second time } echo "<br>"; $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (EventAndNews::where('title', '=', $title)->exists()) { echo $title . " already exists"; } else { $event = new EventAndNews(); $event->article_id = $id; $event->title = $title; $event->url = $url; $event->time1 = $time1; if (isset($time2)) { $event->time2 = $time2; } $event->type = 'event'; $event->save(); echo "stored!"; } }); $client = new Client(); $crawler = $client->request('GET', 'http://www.cmc.edu/news/news-releases'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $crawler->filter('div.view-content > div')->each(function ($node) { $imgUrl = null; $title = $node->filter('h4 a')->text(); //title $url = "http://www.cmc.edu" . $node->filter('h4 a')->attr('href'); //url if ($node->filter('a img')->count()) { $imgUrl = $node->filter('img')->attr('src'); //img src } $date = $node->filter('p')->text(); $date = createDate($date); //date $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); if (EventAndNews::where('title', '=', $title)->exists()) { echo $title . " exists"; echo "<br>"; } else { $event = new EventAndNews(); $event->article_id = $id; $event->title = $title; $event->url = $url; if (isset($imgUrl)) { $event->imgUrl = $imgUrl; } $event->time1 = $date; $event->type = 'news'; $event->save(); echo "stored: " . $title; } }); }
/** * Execute the console command. * * @return mixed */ public function handle() { $client = new Client(); $crawler = $client->request('GET', 'http://www1.claremontmckenna.edu/mmca/cur_menu.php'); $status_code = $client->getResponse()->getStatus(); if ($status_code == 200) { echo '200 OK<br>'; } $count = 0; $crawler->filter('table')->last()->each(function ($node) { $times = $node->filter('tr')->filter('td')->each(function ($node2, $count) { if ($count % 2 == 0) { $date = $node2->text(); $date = explode(',', $date); $date = $date[1]; $date = explode(' ', $date); $month = date_parse($date[0]); $month = $month['month']; $day = $date[1]; if (strlen($day) < 2) { $day = "0" . $day; //add 0 if its single digit } $year = 2015; $date = $year . "-" . $month . "-" . $day; //add together $date = Carbon::createFromFormat('Y-m-d', $date)->toDateString(); //format return $date; } }); $values = $node->filter('tr')->filter('td')->each(function ($node1, $count) { if ($count % 2 == 1) { $foods = trimWhiteSpace($node1->children()->last()->children()->first()->html()); $main = trimWhiteSpace($node1->children()->last()->children()->last()->html()); $foods = ltrim(rtrim(str_replace("<br>", '-', $foods), "-"), "-"); $foods = explode('-', $foods); $main = ltrim(rtrim(str_replace("--", "-", str_replace("<br>", '-', $main)), "-"), "-"); $main = explode('-', $main); return $main; } $count++; }); $values = array_slice(array_filter($values), 0); $times = array_slice(array_filter($times), 0); for ($i = 0; $i < count($values); $i++) { $foods = ""; $count_food = 0; for ($j = 0; $j < count($values[$i]); $j++) { $foods[$j] = $values[$i][$j]; } $date = $times[$i]; $id = DB::table('email_articles')->where('post_date', $date)->value('article_id'); $ath_id = DB::table('ath')->where('article_id', $id)->where('event_time', 'N/A')->value('ath_id'); if (AthFood::where('food_1', '=', $foods[0])->exists()) { echo $foods[0] . " already exists"; } else { $menu = new AthFood(); $menu->article_id = $id; $menu->ath_id = $ath_id; $menu->food_1 = $foods[0]; if (isset($foods[1])) { $menu->food_2 = $foods[1]; } if (isset($foods[2])) { $menu->food_3 = $foods[2]; } if (isset($foods[3])) { $menu->food_4 = $foods[3]; } if (isset($foods[4])) { $menu->food_5 = $foods[4]; } $menu->save(); echo "stored!"; } } }); }