private function getOffering($created, $starDate, $endDate)
 {
     $offering = new Offering();
     $offering->setCreated($created);
     $offering->setStartDate($starDate);
     $offering->setEndDate($endDate);
     $offering->setStatus(Offering::START_DATES_KNOWN);
     return $offering;
 }
Ejemplo n.º 2
0
 public function scrape()
 {
     $em = $this->getManager();
     // Array of offerings created or updated
     $offerings = array();
     $this->out("Scraping " . $this->initiative->getName());
     // Step 1: Getting a list of course URLs
     $this->out("Getting a list of course pages");
     $urls = $this->getListOfCoursePages();
     $urlsCount = count($urls);
     // Step 2: Go through the page and create/update offering
     $this->out("Number of courses found: {$urlsCount}");
     $this->out("Gathering details about each course");
     $courseDetails = array();
     foreach ($urls as $url) {
         if (!$url) {
             continue;
         }
         $courseDetail = array();
         $this->domParser->load(file_get_contents(self::BASE_URL . $url));
         // Ignore self paced
         if (!$this->domParser->find('h2.offering_dates_date', 0)) {
             continue;
         }
         // Get Name and shortName
         $nameString = $this->domParser->find('h1.page-title', 0)->plaintext;
         $openBracketPosition = strpos($nameString, '(');
         $closeBracketPosition = strpos($nameString, ')');
         $courseDetail['name'] = substr($nameString, 0, $openBracketPosition - 1);
         $courseDetail['shortName'] = substr($nameString, $openBracketPosition + 1, $closeBracketPosition - $openBracketPosition - 1);
         if ($courseDetail['name'] == 'Introduction to Nursing in Healthcar') {
             $courseDetail['name'] = 'Introduction to Nursing in Healthcare';
             $courseDetail['shortName'] = 'IntroNur';
         }
         // Get the video id from the url
         // eg. www.youtube.com/embed/Bw8HkjGQb3U?wmode=opaque&rel=0&showinfo=0
         $youtubeIdPosition = 31;
         $video = 'http://' . $this->domParser->find('iframe.media-youtube-player', 0)->src;
         $questionMarkPosition = strpos($video, '?');
         $courseDetail['video'] = 'http://www.youtube.com/watch?v=' . substr($video, $youtubeIdPosition, $questionMarkPosition - $youtubeIdPosition);
         $instructors = trim($this->domParser->find('div[id=subject-teacher-tagline]', 0)->plaintext);
         // Remove the 'by'
         $instructors = substr($instructors, 3);
         $courseDetail['instructors'] = explode(' & ', $instructors);
         $courseDetail['desc'] = $this->domParser->find('div.offering_body', 0)->plaintext;
         $courseDetail['start_date'] = $this->domParser->find('h2.offering_dates_date', 0)->plaintext;
         $courseDetail['end_date'] = $this->domParser->find('h2.offering_dates_date', 1)->plaintext;
         $courseDetail['url'] = $url;
         print_r($courseDetail);
         $courseDetails[] = $courseDetail;
         $this->domParser->clear();
     }
     $this->out(count($courseDetails) . ' course pages found');
     // Default stream
     $stream = $this->dbHelper->getStreamBySlug('business');
     $this->out("Default stream is " . $stream->getName());
     foreach ($courseDetails as $courseDetail) {
         /**
          * Taking a shortcut here. Check if a course is created or not. If it isn't create the
          * course,offering, etc. Updates are ignored
          * TODO: Not take a shortcut
          */
         // Build a course object
         $course = new Course();
         $courseShortName = 'open2study_' . $courseDetail['shortName'];
         $course->setShortName($courseShortName);
         $course->setInitiative($this->initiative);
         $course->setName($courseDetail['name']);
         $course->setDescription($courseDetail['desc']);
         $course->setStream($stream);
         // Default to Business
         $course->setVideoIntro($courseDetail['video']);
         $course->setUrl(self::BASE_URL . $courseDetail['url']);
         $dbCourse = $this->dbHelper->getCourseByShortName($courseShortName);
         if (!$dbCourse) {
             if ($this->doCreate()) {
                 // New course
                 $this->out("NEW COURSE - " . $course->getName());
                 if ($this->doModify()) {
                     foreach ($courseDetail['instructors'] as $instructor) {
                         $course->addInstructor($this->dbHelper->createInstructorIfNotExists($instructor));
                     }
                     $em->persist($course);
                     $em->flush();
                 }
             }
         } else {
             $course = $dbCourse;
         }
         // Check if offering exists
         $shortName = $this->getOfferingShortName($courseDetail);
         $offering = $this->dbHelper->getOfferingByShortName($shortName);
         if ($offering) {
             continue;
         }
         // Check if create offering is oon
         if (!$this->doCreate()) {
             $offerings[] = $offering;
             // Add it to the offerings table
             continue;
         }
         $offering = new Offering();
         $offering->setCourse($course);
         $offering->setStartDate(\DateTime::createFromFormat("d/m/Y", $courseDetail['start_date']));
         $offering->setEndDate(\DateTime::createFromFormat("d/m/Y", $courseDetail['end_date']));
         $offering->setStatus(Offering::START_DATES_KNOWN);
         $offering->setLength(4);
         $offering->setShortName($shortName);
         $offering->setUrl(self::BASE_URL . $courseDetail['url']);
         $offering->setVideoIntro($courseDetail['video']);
         $offering->setSearchDesc($courseDetail['desc']);
         $offering->setCreated(new \DateTime());
         if ($this->doModify()) {
             try {
                 $em->persist($offering);
                 $em->flush();
                 $this->out("OFFERING {$courseDetail['name']} created");
             } catch (\Exception $e) {
                 $this->out("OFFERING {$courseDetail['name']} creation FAILED");
             }
         }
         $offerings[] = $offering;
     }
     return $offerings;
 }