/** * Build a doctrine Course Entity out of a csv row * @param $row * @return Course */ public function getCourseEntity($row) { $course = new Course(); $course->setName($row[0]); $course->setDescription($row[1]); $course->setVideoIntro(str_replace('http', 'https', $row[4])); $course->setUrl($row[5]); $course->setShortName($this->getCourseId($row[5])); $course->setInitiative($this->initiative); // Set the language to arabic $langMap = $this->dbHelper->getLanguageMap(); $course->setLanguage($langMap['Arabic']); // Set the default stream as humanities $defaultStream = $this->dbHelper->getStreamBySlug('humanities'); $course->setStream($defaultStream); // Calculate the length of the course $start = new \DateTime($row[2]); $end = new \DateTime($row[3]); $length = ceil($start->diff($end)->days / 7); $course->setLength($length); return $course; }
/** * Given an array built from edX csv returns a course entity * @param array $c */ private function getCourseEntity($c = array()) { $defaultStream = $this->dbHelper->getStreamBySlug('cs'); $langMap = $this->dbHelper->getLanguageMap(); $defaultLanguage = $langMap['English']; $course = new Course(); $course->setShortName($this->getShortName($c)); $course->setInitiative($this->initiative); $course->setName($c['course-code'] . ': ' . $c['title']); $course->setDescription($c['description']); $course->setLongDescription(nl2br($c['description'])); $course->setLanguage($defaultLanguage); $course->setStream($defaultStream); // Default to Computer Science $course->setVideoIntro($c['course-video-youtube']); $course->setUrl($c['link']); $course->setCertificate(false); $course->setVerifiedCertificate($c['course-verified']); // Calculate length $length = null; if (!empty($c['course-end'])) { $start = new \DateTime($c['course-start']); $end = new \DateTime($c['course-end']); $length = ceil($start->diff($end)->days / 7); } $course->setLength($length); return $course; }
public function scrape() { if ($this->isCredential) { $this->scrapeCredentials(); return; } $defaultStream = $this->dbHelper->getStreamBySlug('cs'); $dbLanguageMap = $this->dbHelper->getLanguageMap(); $em = $this->getManager(); $kuber = $this->container->get('kuber'); // File Api $offerings = array(); //$this->buildOnDemandCoursesList(); /************************************* * On Demand Courses *************************************/ $url = 'https://www.coursera.org/api/courses.v1'; $allCourses = json_decode(file_get_contents($url), true); foreach ($allCourses['elements'] as $element) { if ($element['courseType'] == 'v2.ondemand') { $onDemandCourse = json_decode(file_get_contents(sprintf(self::ONDEMAND_COURSE_URL, $element['slug'])), true); //$this->out( $onDemandCourse['elements'][0]['name'] ); if (!$onDemandCourse['elements'][0]['isReal']) { continue; //skip } $c = $this->getOnDemandCourse($onDemandCourse); $dbCourse = null; $dbCourseFromSlug = $this->dbHelper->getCourseByShortName($c->getShortName()); if ($dbCourseFromSlug) { $dbCourse = $dbCourseFromSlug; } else { $dbCourseFromName = $this->findCourseByName($c->getName(), $this->initiative); if ($dbCourseFromName) { $dbCourse = $dbCourseFromName; } } if (empty($dbCourse)) { // Create the course if ($this->doCreate()) { $this->out("NEW COURSE - " . $c->getName()); // NEW COURSE if ($this->doModify()) { $em->persist($c); $em->flush(); if ($onDemandCourse['elements'][0]['promoPhoto']) { $this->uploadImageIfNecessary($onDemandCourse['elements'][0]['promoPhoto'], $c); } // Send an update to Slack $this->dbHelper->sendNewCourseToSlack($c, $this->initiative); $dbCourse = $c; } } } else { // Update the course details $changedFields = $this->dbHelper->changedFields($this->onDemandCourseFields, $c, $dbCourse); if (!empty($changedFields) && $this->doUpdate()) { $this->out("UPDATE COURSE - " . $dbCourse->getName()); $this->outputChangedFields($changedFields); if ($this->doModify()) { $em->persist($dbCourse); $em->flush(); $this->uploadImageIfNecessary($onDemandCourse['elements'][0]['promoPhoto'], $dbCourse); } } // Check how many of them are self paced $selfPaced = false; if ($dbCourse->getNextOffering()->getStatus() == Offering::COURSE_OPEN) { $selfPaced = true; } else { /* if( isset($onDemandCourse['elements'][0]['plannedLaunchDate'])) { $now = new \DateTime(); try{ $startDate = new \DateTime( $onDemandCourse['elements'][0]['plannedLaunchDate'] ); } catch(\Exception $e) { $startDate = new \DateTime(); } if( $startDate != $dbCourse->getNextOffering()->getStartDate() ) { if ($this->doModify()) { $o = $dbCourse->getNextOffering(); $o->setStartDate( $startDate ); $o->setStatus( Offering::START_MONTH_KNOWN ); $em->persist( $o ); $em->flush(); $this->out("OnDemand Course Updated Start Date : " . $element['name']) ; } } else if ( $now >= $dbCourse->getNextOffering()->getStartDate() ) { if ($this->doModify()) { //Update the course to be self paced $o = $dbCourse->getNextOffering(); $o->setStatus( Offering::COURSE_OPEN ); $em->persist( $o ); $em->flush(); $this->out("OnDemand Course Updated to Self paced : " . $element['name']) ; } } $selfPaced = true; } */ } // Update the sessions. $courseId = $onDemandCourse['elements'][0]['id']; $sessionDetails = json_decode(file_get_contents(sprintf(self::ONDEMAND_SESSION_IDS, $courseId)), true); if (empty($sessionDetails['elements'])) { // Create an offering $offering = new Offering(); $offering->setShortName($dbCourse->getShortName()); $offering->setUrl($dbCourse->getUrl()); $offering->setCourse($dbCourse); if (isset($onDemandCourse['elements'][0]['plannedLaunchDate'])) { try { // Self paced Not Started - But will Start in the future $this->out("SELF PACED FUTURE COURSE : " . $dbCourse->getName()); $startDate = new \DateTime($onDemandCourse['elements'][0]['plannedLaunchDate']); $endDate = new \DateTime($onDemandCourse['elements'][0]['plannedLaunchDate']); $endDate->add(new \DateInterval("P30D")); $offering->setStatus(Offering::START_DATES_KNOWN); } catch (\Exception $e) { continue; } } else { // Self paced course that can be accessed right now $this->out("SELF PACED COURSE : " . $dbCourse->getName()); $startDate = new \DateTime(); $offering->setStatus(Offering::COURSE_OPEN); $endDate = new \DateTime(); $endDate->add(new \DateInterval("P30D")); if ($dbCourse->getNextOffering()->getStatus() == Offering::COURSE_OPEN) { // Already self paced nothing to be done here continue; } } $offering->setStartDate($startDate); $offering->setEndDate($endDate); // Check if offering exists $dbOffering = $this->dbHelper->getOfferingByShortName($dbCourse->getShortName()); if ($dbOffering) { // Check if the dates and other details are right $this->offeringChangedFields($offering, $dbOffering); } else { // Save and Create the offering if ($this->doCreate()) { $this->out("NEW OFFERING - " . $offering->getName()); if ($this->doModify()) { $em->persist($offering); $em->flush(); $this->dbHelper->sendNewOfferingToSlack($offering); } } } } else { $dbOffering = null; // Regularly Scheduled Course $this->out("Regularly Scheduled Course : " . $dbCourse->getName()); foreach ($dbCourse->getOfferings() as $o) { if ($o->getShortName() == $dbCourse->getShortName()) { $dbOffering = $o; // A course with future announced date becomes current and has sessions break; } } foreach ($sessionDetails['elements'] as $session) { $sessionId = $session['id']; $offeringShortName = 'coursera_' . $sessionId; // Create an offering $offering = new Offering(); $offering->setShortName($offeringShortName); $offering->setUrl($dbCourse->getUrl()); $offering->setCourse($dbCourse); $offering->setStatus(Offering::START_DATES_KNOWN); $startDate = new \DateTime('@' . intval($session['startedAt'] / 1000)); $endDate = new \DateTime('@' . intval($session['endedAt'] / 1000)); $startDate->setTimezone(new \DateTimeZone('America/Los_Angeles')); $endDate->setTimezone(new \DateTimeZone('America/Los_Angeles')); $offering->setStartDate($startDate); $offering->setEndDate($endDate); // Check if offering exists if (!$dbOffering) { $dbOffering = $this->dbHelper->getOfferingByShortName($offeringShortName); } if ($dbOffering) { // Check if the dates and other details are right $this->offeringChangedFields($offering, $dbOffering); } else { if ($this->doCreate()) { $this->out("NEW OFFERING - " . $offering->getName()); if ($this->doModify()) { $em->persist($offering); $em->flush(); $this->dbHelper->sendNewOfferingToSlack($offering); } } } $dbOffering = null; } } if (!$selfPaced) { //$this->out("OnDemand Session Missing : " . $element['name']) ; } } } } /************************************* * Session Based Courses *************************************/ $courseraCourses = $this->getCoursesArray(); foreach ($courseraCourses as $courseraCourse) { $selfServingId = $courseraCourse['self_service_course_id']; $courseraCourseId = $courseraCourse['id']; $courseraCourseShortName = $courseraCourse['short_name']; $courseShortName = 'coursera_' . $courseraCourseShortName; $courseUrl = $this->getCourseLink($courseraCourse); $courseLang = isset(self::$languageMap[$courseraCourse['language']]) ? self::$languageMap[$courseraCourse['language']] : null; $catalogDetails = $this->getDetailsFromCourseraCatalog($courseraCourseId); // Create a course object $course = new Course(); $course->setShortName($courseShortName); $course->setInitiative($this->initiative); $course->setName($courseraCourse['name']); $course->setDescription($courseraCourse['short_description']); $course->setLongDescription($catalogDetails['aboutTheCourse']); $course->setSyllabus($catalogDetails['courseSyllabus']); $course->setStream($defaultStream); // Default to Computer Science $course->setVideoIntro($this->getVideoUrl($courseraCourse)); $course->setUrl($courseUrl); if (isset($dbLanguageMap[$courseLang])) { $course->setLanguage($dbLanguageMap[$courseLang]); } else { $this->out("Language not found " . $courseraCourse['language']); } // Get the workload if (!empty($catalogDetails['estimatedClassWorkload']) && ($workload = $this->getWorkLoad($catalogDetails['estimatedClassWorkload']))) { $course->setWorkloadMin($workload[0]); $course->setWorkloadMax($workload[1]); } // Get the certificate information $sid = $this->getLatestSessionId($catalogDetails); if ($sid) { $sDetails = $this->getDetailsFromSessionCatalog($sid); $course->setCertificate($sDetails['eligibleForCertificates']); $course->setVerifiedCertificate($sDetails['eligibleForSignatureTrack']); } // Add the university foreach ($courseraCourse['universities'] as $university) { $ins = new Institution(); $ins->setName($university['name']); $ins->setIsUniversity(true); $ins->setSlug($university['short_name']); $course->addInstitution($this->dbHelper->createInstitutionIfNotExists($ins)); } // Add categories to search description $searchDesc = array(); foreach ($courseraCourse['categories'] as $category) { $searchDesc[] = $category['name']; } $course->setSearchDesc(implode(' ', $searchDesc)); // Filter out of the offerings to remove those with no status and then get the length of the newest offering $courseraOfferings = array_filter($courseraCourse['courses'], function ($offering) { return !($offering['status'] == 0); }); if (!empty($courseraOfferings)) { $newestOffering = end($courseraOfferings); $course->setLength($this->getOfferingLength($newestOffering['duration_string'])); reset($courseraOfferings); } $courseImage = $courseraCourse['large_icon']; $dbCourse = $this->dbHelper->getCourseByShortName($courseShortName); if (!$dbCourse) { if ($this->doCreate()) { // New course $this->out("NEW COURSE - " . $course->getName()); if ($this->doModify()) { // Get the instructors using the coursera instructor api $courseraInstructors = $this->getInstructorsArray($courseraCourseShortName); foreach ($courseraInstructors as $courseraInstructor) { $insName = $courseraInstructor['first_name'] . ' ' . $courseraInstructor['last_name']; $course->addInstructor($this->dbHelper->createInstructorIfNotExists($insName)); } $em->persist($course); $em->flush(); $this->dbHelper->sendNewCourseToSlack($course, $this->initiative); // Upload the image if ($courseImage) { $this->uploadImageIfNecessary($courseImage, $course); } } } } else { // Check if any fields are modified $courseModified = false; $changedFields = array(); // To keep track of fields that have changed foreach ($this->courseFields as $field) { $getter = 'get' . $field; $setter = 'set' . $field; if ($course->{$getter}() != $dbCourse->{$getter}()) { $courseModified = true; // Add the changed field to the changedFields array $changed = array(); $changed['field'] = $field; $changed['old'] = $dbCourse->{$getter}(); $changed['new'] = $course->{$getter}(); $changedFields[] = $changed; $dbCourse->{$setter}($course->{$getter}()); } } if ($this->doUpdate()) { // Upload the image if ($courseImage) { $this->uploadImageIfNecessary($courseImage, $dbCourse); } } if ($courseModified && $this->doUpdate()) { // Course has been modified $this->out("UPDATE COURSE - " . $dbCourse->getName()); $this->outputChangedFields($changedFields); if ($this->doModify()) { $em->persist($dbCourse); $em->flush(); } } $course = $dbCourse; } // Done with course. Now create offerings foreach ($courseraOfferings as $courseraOffering) { // Create a offering object and set its parameters $offering = new Offering(); $offeringShortName = $courseraCourseShortName . '_' . $courseraCourseId . '_' . $courseraOffering['id']; $offering->setShortName($offeringShortName); $offering->setCourse($course); $offering->setUrl($courseUrl); // Figure out the dates and status $details = array(); $details['status'] = Offering::START_DATES_UNKNOWN; if ($selfServingId == $courseraOffering['id']) { $details['status'] = Offering::COURSE_OPEN; } $details = array_merge($details, $this->getDates($courseraOffering, $this->getOfferingLength($courseraOffering['duration_string']))); $offering->setStartDate(new \DateTime($details['start_date'])); $offering->setStatus($details['status']); if (isset($details['end_date'])) { $offering->setEndDate(new \DateTime($details['end_date'])); } $dbOffering = $this->dbHelper->getOfferingByShortName($offeringShortName); if (!$dbOffering) { if ($this->doCreate()) { $this->out("NEW OFFERING - " . $offering->getName()); if ($this->doModify()) { $em->persist($offering); $em->flush(); } $this->dbHelper->sendNewOfferingToSlack($offering); $offerings[] = $offering; } } else { // old offering. Check if has been modified or not $offeringModified = false; $changedFields = array(); foreach ($this->offeringFields as $field) { $getter = 'get' . $field; $setter = 'set' . $field; if ($offering->{$getter}() != $dbOffering->{$getter}()) { $offeringModified = true; // Add the changed field to the changedFields array $changed = array(); $changed['field'] = $field; $changed['old'] = $dbOffering->{$getter}(); $changed['new'] = $offering->{$getter}(); $changedFields[] = $changed; $dbOffering->{$setter}($offering->{$getter}()); } } if ($offeringModified && $this->doUpdate()) { // Offering has been modified $this->out("UPDATE OFFERING - " . $dbOffering->getName()); $this->outputChangedFields($changedFields); if ($this->doModify()) { $em->persist($dbOffering); $em->flush(); } $offerings[] = $dbOffering; } } } } return $offerings; }
private function getCourseEntity($udacityCourse = array()) { $defaultStream = $this->dbHelper->getStreamBySlug('cs'); $langMap = $this->dbHelper->getLanguageMap(); $defaultLanguage = $langMap['English']; $course = new Course(); $course->setShortName(substr('udacity_' . $udacityCourse['slug'], 0, 50)); $course->setInitiative($this->initiative); $course->setName($udacityCourse['title']); $course->setDescription($udacityCourse['short_summary']); $course->setLanguage($defaultLanguage); $course->setStream($defaultStream); // Default to Computer Science $course->setCertificate(false); $course->setUrl($udacityCourse['homepage']); $course->setSyllabus(nl2br($udacityCourse['syllabus'])); $course->setWorkloadMin(6); $course->setWorkloadMax(6); // Calculate length $length = null; $expectedDuration = $udacityCourse['expected_duration']; if ($udacityCourse['expected_duration_unit'] == 'months') { $length = $expectedDuration * 4; } elseif ($udacityCourse['expected_duration_unit'] == 'weeks') { $length = $expectedDuration; } $course->setLength($length); // Calculate Description $course->setLongDescription(nl2br($udacityCourse['summary'] . '<br/><br/><b>Why Take This Course?</b><br/>' . $udacityCourse['expected_learning'])); // Intro Video if (!empty($udacityCourse['teaser_video']['youtube_url'])) { $course->setVideoIntro($udacityCourse['teaser_video']['youtube_url']); } return $course; }
public function scrape() { $em = $this->getManager(); // Array of offerings created or updated $offerings = array(); $this->out("Scraping " . $this->initiative->getName()); // Step 1: Getting a list of course URLs $this->out("Getting a list of course pages"); $urls = $this->getListOfCoursePages(); $urlsCount = count($urls); // Step 2: Go through the page and create/update offering $this->out("Number of courses found: {$urlsCount}"); $this->out("Gathering details about each course"); $courseDetails = array(); foreach ($urls as $url) { if (!$url) { continue; } $courseDetail = array(); $this->domParser->load(file_get_contents(self::BASE_URL . $url)); // Ignore self paced if (!$this->domParser->find('h2.offering_dates_date', 0)) { continue; } // Get Name and shortName $nameString = $this->domParser->find('h1.page-title', 0)->plaintext; $openBracketPosition = strpos($nameString, '('); $closeBracketPosition = strpos($nameString, ')'); $courseDetail['name'] = substr($nameString, 0, $openBracketPosition - 1); $courseDetail['shortName'] = substr($nameString, $openBracketPosition + 1, $closeBracketPosition - $openBracketPosition - 1); if ($courseDetail['name'] == 'Introduction to Nursing in Healthcar') { $courseDetail['name'] = 'Introduction to Nursing in Healthcare'; $courseDetail['shortName'] = 'IntroNur'; } // Get the video id from the url // eg. www.youtube.com/embed/Bw8HkjGQb3U?wmode=opaque&rel=0&showinfo=0 $youtubeIdPosition = 31; $video = 'http://' . $this->domParser->find('iframe.media-youtube-player', 0)->src; $questionMarkPosition = strpos($video, '?'); $courseDetail['video'] = 'http://www.youtube.com/watch?v=' . substr($video, $youtubeIdPosition, $questionMarkPosition - $youtubeIdPosition); $instructors = trim($this->domParser->find('div[id=subject-teacher-tagline]', 0)->plaintext); // Remove the 'by' $instructors = substr($instructors, 3); $courseDetail['instructors'] = explode(' & ', $instructors); $courseDetail['desc'] = $this->domParser->find('div.offering_body', 0)->plaintext; $courseDetail['start_date'] = $this->domParser->find('h2.offering_dates_date', 0)->plaintext; $courseDetail['end_date'] = $this->domParser->find('h2.offering_dates_date', 1)->plaintext; $courseDetail['url'] = $url; print_r($courseDetail); $courseDetails[] = $courseDetail; $this->domParser->clear(); } $this->out(count($courseDetails) . ' course pages found'); // Default stream $stream = $this->dbHelper->getStreamBySlug('business'); $this->out("Default stream is " . $stream->getName()); foreach ($courseDetails as $courseDetail) { /** * Taking a shortcut here. Check if a course is created or not. If it isn't create the * course,offering, etc. Updates are ignored * TODO: Not take a shortcut */ // Build a course object $course = new Course(); $courseShortName = 'open2study_' . $courseDetail['shortName']; $course->setShortName($courseShortName); $course->setInitiative($this->initiative); $course->setName($courseDetail['name']); $course->setDescription($courseDetail['desc']); $course->setStream($stream); // Default to Business $course->setVideoIntro($courseDetail['video']); $course->setUrl(self::BASE_URL . $courseDetail['url']); $dbCourse = $this->dbHelper->getCourseByShortName($courseShortName); if (!$dbCourse) { if ($this->doCreate()) { // New course $this->out("NEW COURSE - " . $course->getName()); if ($this->doModify()) { foreach ($courseDetail['instructors'] as $instructor) { $course->addInstructor($this->dbHelper->createInstructorIfNotExists($instructor)); } $em->persist($course); $em->flush(); } } } else { $course = $dbCourse; } // Check if offering exists $shortName = $this->getOfferingShortName($courseDetail); $offering = $this->dbHelper->getOfferingByShortName($shortName); if ($offering) { continue; } // Check if create offering is oon if (!$this->doCreate()) { $offerings[] = $offering; // Add it to the offerings table continue; } $offering = new Offering(); $offering->setCourse($course); $offering->setStartDate(\DateTime::createFromFormat("d/m/Y", $courseDetail['start_date'])); $offering->setEndDate(\DateTime::createFromFormat("d/m/Y", $courseDetail['end_date'])); $offering->setStatus(Offering::START_DATES_KNOWN); $offering->setLength(4); $offering->setShortName($shortName); $offering->setUrl(self::BASE_URL . $courseDetail['url']); $offering->setVideoIntro($courseDetail['video']); $offering->setSearchDesc($courseDetail['desc']); $offering->setCreated(new \DateTime()); if ($this->doModify()) { try { $em->persist($offering); $em->flush(); $this->out("OFFERING {$courseDetail['name']} created"); } catch (\Exception $e) { $this->out("OFFERING {$courseDetail['name']} creation FAILED"); } } $offerings[] = $offering; } return $offerings; }