コード例 #1
0
 public function scrape()
 {
     $em = $this->getManager();
     // Array of offerings created or updated
     $offerings = array();
     $this->out("Scraping " . $this->initiative->getName());
     // Step 1: Getting a list of course URLs
     $this->out("Getting a list of course pages");
     $urls = $this->getListOfCoursePages();
     $urlsCount = count($urls);
     // Step 2: Go through the page and create/update offering
     $this->out("Number of courses found: {$urlsCount}");
     $this->out("Gathering details about each course");
     $courseDetails = array();
     foreach ($urls as $url) {
         if (!$url) {
             continue;
         }
         $courseDetail = array();
         $this->domParser->load(file_get_contents(self::BASE_URL . $url));
         // Ignore self paced
         if (!$this->domParser->find('h2.offering_dates_date', 0)) {
             continue;
         }
         // Get Name and shortName
         $nameString = $this->domParser->find('h1.page-title', 0)->plaintext;
         $openBracketPosition = strpos($nameString, '(');
         $closeBracketPosition = strpos($nameString, ')');
         $courseDetail['name'] = substr($nameString, 0, $openBracketPosition - 1);
         $courseDetail['shortName'] = substr($nameString, $openBracketPosition + 1, $closeBracketPosition - $openBracketPosition - 1);
         if ($courseDetail['name'] == 'Introduction to Nursing in Healthcar') {
             $courseDetail['name'] = 'Introduction to Nursing in Healthcare';
             $courseDetail['shortName'] = 'IntroNur';
         }
         // Get the video id from the url
         // eg. www.youtube.com/embed/Bw8HkjGQb3U?wmode=opaque&rel=0&showinfo=0
         $youtubeIdPosition = 31;
         $video = 'http://' . $this->domParser->find('iframe.media-youtube-player', 0)->src;
         $questionMarkPosition = strpos($video, '?');
         $courseDetail['video'] = 'http://www.youtube.com/watch?v=' . substr($video, $youtubeIdPosition, $questionMarkPosition - $youtubeIdPosition);
         $instructors = trim($this->domParser->find('div[id=subject-teacher-tagline]', 0)->plaintext);
         // Remove the 'by'
         $instructors = substr($instructors, 3);
         $courseDetail['instructors'] = explode(' & ', $instructors);
         $courseDetail['desc'] = $this->domParser->find('div.offering_body', 0)->plaintext;
         $courseDetail['start_date'] = $this->domParser->find('h2.offering_dates_date', 0)->plaintext;
         $courseDetail['end_date'] = $this->domParser->find('h2.offering_dates_date', 1)->plaintext;
         $courseDetail['url'] = $url;
         print_r($courseDetail);
         $courseDetails[] = $courseDetail;
         $this->domParser->clear();
     }
     $this->out(count($courseDetails) . ' course pages found');
     // Default stream
     $stream = $this->dbHelper->getStreamBySlug('business');
     $this->out("Default stream is " . $stream->getName());
     foreach ($courseDetails as $courseDetail) {
         /**
          * Taking a shortcut here. Check if a course is created or not. If it isn't create the
          * course,offering, etc. Updates are ignored
          * TODO: Not take a shortcut
          */
         // Build a course object
         $course = new Course();
         $courseShortName = 'open2study_' . $courseDetail['shortName'];
         $course->setShortName($courseShortName);
         $course->setInitiative($this->initiative);
         $course->setName($courseDetail['name']);
         $course->setDescription($courseDetail['desc']);
         $course->setStream($stream);
         // Default to Business
         $course->setVideoIntro($courseDetail['video']);
         $course->setUrl(self::BASE_URL . $courseDetail['url']);
         $dbCourse = $this->dbHelper->getCourseByShortName($courseShortName);
         if (!$dbCourse) {
             if ($this->doCreate()) {
                 // New course
                 $this->out("NEW COURSE - " . $course->getName());
                 if ($this->doModify()) {
                     foreach ($courseDetail['instructors'] as $instructor) {
                         $course->addInstructor($this->dbHelper->createInstructorIfNotExists($instructor));
                     }
                     $em->persist($course);
                     $em->flush();
                 }
             }
         } else {
             $course = $dbCourse;
         }
         // Check if offering exists
         $shortName = $this->getOfferingShortName($courseDetail);
         $offering = $this->dbHelper->getOfferingByShortName($shortName);
         if ($offering) {
             continue;
         }
         // Check if create offering is oon
         if (!$this->doCreate()) {
             $offerings[] = $offering;
             // Add it to the offerings table
             continue;
         }
         $offering = new Offering();
         $offering->setCourse($course);
         $offering->setStartDate(\DateTime::createFromFormat("d/m/Y", $courseDetail['start_date']));
         $offering->setEndDate(\DateTime::createFromFormat("d/m/Y", $courseDetail['end_date']));
         $offering->setStatus(Offering::START_DATES_KNOWN);
         $offering->setLength(4);
         $offering->setShortName($shortName);
         $offering->setUrl(self::BASE_URL . $courseDetail['url']);
         $offering->setVideoIntro($courseDetail['video']);
         $offering->setSearchDesc($courseDetail['desc']);
         $offering->setCreated(new \DateTime());
         if ($this->doModify()) {
             try {
                 $em->persist($offering);
                 $em->flush();
                 $this->out("OFFERING {$courseDetail['name']} created");
             } catch (\Exception $e) {
                 $this->out("OFFERING {$courseDetail['name']} creation FAILED");
             }
         }
         $offerings[] = $offering;
     }
     return $offerings;
 }
コード例 #2
0
ファイル: Scraper.php プロジェクト: vlsyu/class-central
 private function getOnDemandCourse($data = array())
 {
     $dbLanguageMap = $this->dbHelper->getLanguageMap();
     $course = new Course();
     $course->setShortName(substr('coursera_' . $data['elements'][0]['slug'], 0, 49));
     $course->setInitiative($this->initiative);
     $course->setName($data['elements'][0]['name']);
     $course->setDescription($data['elements'][0]['description']);
     $course->setLongDescription(nl2br($data['elements'][0]['description']));
     $course->setStream($this->dbHelper->getStreamBySlug('cs'));
     // Default to Computer Science
     $course->setUrl('https://www.coursera.org/learn/' . $data['elements'][0]['slug']);
     $lang = self::$languageMap[$data['elements']['0']['primaryLanguageCodes'][0]];
     if (isset($dbLanguageMap[$lang])) {
         $course->setLanguage($dbLanguageMap[$lang]);
     } else {
         $this->out("Language not found " . $data['elements']['0']['primaryLanguageCodes'][0]);
     }
     $course->setCertificate(false);
     $course->setVerifiedCertificate($data['elements'][0]['isVerificationEnabled']);
     // Add the university
     foreach ($data['linked']['partners.v1'] as $university) {
         $ins = new Institution();
         $ins->setName($university['name']);
         $ins->setIsUniversity(true);
         $ins->setSlug($university['shortName']);
         $course->addInstitution($this->dbHelper->createInstitutionIfNotExists($ins));
     }
     foreach ($data['linked']['instructors.v1'] as $courseraInstructor) {
         if (!empty($courseraInstructor['fullName'])) {
             $insName = $courseraInstructor['fullName'];
         } else {
             $insName = $courseraInstructor['firstName'] . ' ' . $courseraInstructor['lastName'];
         }
         $course->addInstructor($this->dbHelper->createInstructorIfNotExists($insName));
     }
     // Get Course Details like Syllabus and length
     $courseDetails = json_decode(file_get_contents(sprintf(self::ONDEMAND_OPENCOURSE_API, $data['elements'][0]['slug'])), true);
     if (!empty($courseDetails)) {
         $syllabus = '';
         foreach ($courseDetails['courseMaterial']['elements'] as $item) {
             $syllabus .= "<b>{$item['name']}</b><br/>{$item['description']}<br/><br/>";
         }
         $course->setSyllabus($syllabus);
     }
     // Calculate the length of the course
     $schedule = json_decode(file_get_contents(sprintf(self::ONDEMAND_COURSE_SCHEDULE, $data['elements'][0]['id'])), true);
     if (!empty($schedule)) {
         $length = 0;
         foreach ($schedule['elements'][0]['defaultSchedule']['periods'] as $period) {
             $length += $period['numberOfWeeks'];
         }
         if ($length > 0) {
             $course->setLength($length);
         }
     }
     return $course;
 }
コード例 #3
0
 private function getOnDemandCourse($data = array())
 {
     $dbLanguageMap = $this->dbHelper->getLanguageMap();
     $course = new Course();
     $course->setShortName(substr('coursera_' . $data['elements'][0]['slug'], 0, 49));
     $course->setInitiative($this->initiative);
     $course->setName($data['elements'][0]['name']);
     $course->setDescription($data['elements'][0]['description']);
     $course->setLongDescription(nl2br($data['elements'][0]['description']));
     $course->setStream($this->dbHelper->getStreamBySlug('cs'));
     // Default to Computer Science
     $course->setUrl('https://www.coursera.org/learn/' . $data['elements'][0]['slug']);
     $lang = self::$languageMap[$data['elements']['0']['primaryLanguageCodes'][0]];
     if (isset($dbLanguageMap[$lang])) {
         $course->setLanguage($dbLanguageMap[$lang]);
     } else {
         $this->out("Language not found " . $data['elements']['0']['primaryLanguageCodes'][0]);
     }
     $course->setCertificate(false);
     $course->setVerifiedCertificate($data['elements'][0]['isVerificationEnabled']);
     // Add the university
     foreach ($data['linked']['partners.v1'] as $university) {
         $ins = new Institution();
         $ins->setName($university['name']);
         $ins->setIsUniversity(true);
         $ins->setSlug($university['shortName']);
         $course->addInstitution($this->dbHelper->createInstitutionIfNotExists($ins));
     }
     foreach ($data['linked']['instructors.v1'] as $courseraInstructor) {
         if (!empty($courseraInstructor['fullName'])) {
             $insName = $courseraInstructor['fullName'];
         } else {
             $insName = $courseraInstructor['firstName'] . ' ' . $courseraInstructor['lastName'];
         }
         $course->addInstructor($this->dbHelper->createInstructorIfNotExists($insName));
     }
     return $course;
 }