/** * Zapisuje opis do zdjecia do pliku komentarzy. * * @return bool True, jesli udalo sie zapisac opis. */ function zapiszOpisZdjecia($opis) { $zawartoscPliku = file($this->nazwaPlikuKomentarzy); if (strtolower(substr($zawartoscPliku[0], 0, 3)) == "<b>") { // Zdjecie ma juz opis, nalezy zamienic go na nowy. $zawartoscPliku[0] = "<b>$opis</b><br><br>\n"; if (NULL == ($uchwytPliku = fopen($this->nazwaPlikuKomentarzy, "w"))) { Logger::logError("Blad w DaneKomentarzyZPlikow::zapiszOpisZdjecia. Nie mozna otworzyc pliku '" . $this->nazwaPlikuKomentarzy . "'."); return false; } else { $zawartoscPliku = join("", $zawartoscPliku); fwrite($uchwytPliku, $zawartoscPliku); fclose($uchwytPliku); return true; } } else { // Zdjecie nie ma jeszcze opisu, nalezy go dopisac na poczatek pliku. if (NULL == ($uchwytPliku = fopen($this->nazwaPlikuKomentarzy, "w"))) { Logger::logError("Blad w DaneKomentarzyZPlikow::zapiszOpisZdjecia. Nie mozna otworzyc pliku '" . $this->nazwaPlikuKomentarzy . "'."); return false; } else { fwrite($uchwytPliku, "<b>$opis</b><br><br>\n"); $zawartoscPliku = join("", $zawartoscPliku); fwrite($uchwytPliku, $zawartoscPliku); fclose($uchwytPliku); return true; } } }
/** * If link is an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url. * @TODO Move image thumbnail processng to Expand URLs plugin. * @param Logger $logger * @param str $tweet * @param Array $urls */ public static function processTweetURLs($logger, $tweet, $urls = null) { $link_dao = DAOFactory::getDAO('LinkDAO'); if (!$urls) { $urls = Post::extractURLs($tweet['post_text']); } foreach ($urls as $u) { $logger->logInfo("processing url: {$u}", __METHOD__ . ',' . __LINE__); $is_image = 0; $title = ''; $eurl = ''; if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') { $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') { $eurl = $u . '.th.jpg'; $is_image = 1; } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') { $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://picplz.com/')) == 'http://picplz.com/') { $eurl = $u . '/thumb/'; $is_image = 1; } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { $is_image = 1; } elseif (substr($u, 0, strlen('http://instagr.am/')) == 'http://instagr.am/') { $is_image = 1; } if ($link_dao->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) { $logger->logSuccess("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", __METHOD__ . ',' . __LINE__); } else { $logger->logError("Did NOT insert " . $u . " (" . $eurl . ") into links table", __METHOD__ . ',' . __LINE__); } } }
/** * For each API call left, grab oldest follow relationship, check if it exists, and update table. */ public function cleanUpFollows() { $follow_dao = DAOFactory::getDAO('FollowDAO'); $continue_fetching = true; while ($continue_fetching) { $oldfollow = $follow_dao->getOldestFollow('twitter'); if ($oldfollow != null) { $endpoint = $this->api->endpoints['show_friendship']; $args = array(); $args["source_id"] = $oldfollow["followee_id"]; $args["target_id"] = $oldfollow["follower_id"]; try { $this->logger->logInfo("Checking stale follow last seen " . $oldfollow["last_seen"], __METHOD__ . ',' . __LINE__); list($http_status, $payload) = $this->api->apiRequest($endpoint, $args); if ($http_status == 200) { $friendship = $this->api->parseJSONRelationship($payload); if ($friendship['source_follows_target'] == 'true') { $this->logger->logInfo("Updating follow last seen date: " . $args["source_id"] . " follows " . $args["target_id"], __METHOD__ . ',' . __LINE__); $follow_dao->update($oldfollow["followee_id"], $oldfollow["follower_id"], 'twitter'); } else { $this->logger->logInfo("Deactivating follow: " . $args["source_id"] . " does not follow " . $args["target_id"], __METHOD__ . ',' . __LINE__); $follow_dao->deactivate($oldfollow["followee_id"], $oldfollow["follower_id"], 'twitter'); } if ($friendship['target_follows_source'] == 'true') { $this->logger->logInfo("Updating follow last seen date: " . $args["target_id"] . " follows " . $args["source_id"], __METHOD__ . ',' . __LINE__); $follow_dao->update($oldfollow["follower_id"], $oldfollow["followee_id"], 'twitter'); } else { $this->logger->logInfo("Deactivating follow: " . $args["target_id"] . " does not follow " . $args["source_id"], __METHOD__ . ',' . __LINE__); $follow_dao->deactivate($oldfollow["follower_id"], $oldfollow["followee_id"], 'twitter'); } } else { $this->logger->logError("Got non-200 response for " . $endpoint->getShortPath(), __METHOD__ . ',' . __LINE__); $error_code = $this->api->parseJSONErrorCodeAPI($payload); if ($http_status == 403 && $error_code['error'] == 163) { $this->logger->logError("Marking follow inactive due to 403 Source User Not Found " . "error response with API 163 error", __METHOD__ . ',' . __LINE__); // deactivate in both directions $follow_dao->deactivate($oldfollow["followee_id"], $oldfollow["follower_id"], 'twitter'); $follow_dao->deactivate($oldfollow["follower_id"], $oldfollow["followee_id"], 'twitter'); } if ($http_status == 404) { $this->logger->logError("Marking follow inactive due to 404 response", __METHOD__ . ',' . __LINE__); // deactivate in both directions $follow_dao->deactivate($oldfollow["followee_id"], $oldfollow["follower_id"], 'twitter'); $follow_dao->deactivate($oldfollow["follower_id"], $oldfollow["followee_id"], 'twitter'); } } } catch (APICallLimitExceededException $e) { $this->logger->logInfo($e->getMessage(), __METHOD__ . ',' . __LINE__); break; } } else { $continue_fetching = false; } } }
/** * Zwraca tablice obiektow zawierajacych informacje o galeriach zdjec. * * Obiekty w tablicy maja pola takie, jak pola w bazodanowej tabeli "galeriaZdjec". */ function pobierzWszystkieGalerie() { $daneGaleriiZdjec = FabrykaObiektowGaleriiZdjec::daneGaleriiZdjec(); $listaGaleriiZdjec = $daneGaleriiZdjec->pobierzWszystkieGalerie(); if (count($listaGaleriiZdjec) == 0) { Logger::logError("Blad w metodzie ZarzadcaGaleriiZdjec::pobierzWszystkieGalerie. Nie znaleziono zadnej galerii zdjec."); } return $listaGaleriiZdjec; }
/** * Save expanded version of all unexpanded URLs to data store, as well as intermediary short links. */ public function expandOriginalURLs($flickr_api_key = null) { $links_to_expand = $this->link_dao->getLinksToExpand($this->link_limit); $this->logger->logUserInfo(count($links_to_expand) . " links to expand. Please wait. Working...", __METHOD__ . ',' . __LINE__); $total_expanded = 0; $total_errors = 0; $has_expanded_flickr_link = false; foreach ($links_to_expand as $index => $link) { if (Utils::validateURL($link->url)) { $endless_loop_prevention_counter = 0; $this->logger->logInfo("Expanding " . ($total_expanded + 1) . " of " . count($links_to_expand) . " (" . $link->url . ")", __METHOD__ . ',' . __LINE__); //make sure shortened short links--like t.co--get fully expanded $fully_expanded = false; $short_link = $link->url; while (!$fully_expanded) { //begin Flickr thumbnail processing if (isset($flickr_api_key) && substr($short_link, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { self::expandFlickrThumbnail($flickr_api_key, $short_link, $link->url); $has_expanded_flickr_link = true; $fully_expanded = true; } //end Flickr thumbnail processing $expanded_url = URLExpander::expandURL($short_link, $link->url, $index, count($links_to_expand), $this->link_dao, $this->logger); if ($expanded_url == $short_link || $expanded_url == '' || $endless_loop_prevention_counter > self::EXPANSION_CAP) { $fully_expanded = true; } else { $this->short_link_dao->insert($link->id, $short_link); } $short_link = $expanded_url; $endless_loop_prevention_counter++; } if (!$has_expanded_flickr_link) { if ($expanded_url != '') { $image_src = URLProcessor::getImageSource($expanded_url); $this->link_dao->saveExpandedUrl($link->url, $expanded_url, '', $image_src); $total_expanded = $total_expanded + 1; } else { $this->logger->logError($link->url . " not a valid URL - relocates to nowhere", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL - relocates to nowhere"); $total_errors = $total_errors + 1; } } } else { $total_errors = $total_errors + 1; $this->logger->logError($link->url . " not a valid URL", __METHOD__ . ',' . __LINE__); $this->link_dao->saveExpansionError($link->url, "Invalid URL"); } $has_expanded_flickr_link = false; } $this->logger->logUserSuccess($total_expanded . " URLs successfully expanded (" . $total_errors . " errors).", __METHOD__ . ',' . __LINE__); }
/** * If link is an image (Twitpic/Twitgoo/Yfrog/Flickr for now), insert direct path to thumb as expanded url. * @TODO Move image thumbnail processng to Expand URLs plugin. * @param Logger $logger * @param str $tweet * @param Array $urls */ public static function processTweetURLs($logger, $tweet, $urls = null) { $link_dao = DAOFactory::getDAO('LinkDAO'); if (!$urls) { $urls = Post::extractURLs($tweet['post_text']); } foreach ($urls as $u) { $logger->logInfo("processing url: {$u}", __METHOD__ . ',' . __LINE__); $is_image = 0; $title = ''; $eurl = ''; if (substr($u, 0, strlen('http://twitpic.com/')) == 'http://twitpic.com/') { $eurl = 'http://twitpic.com/show/thumb/' . substr($u, strlen('http://twitpic.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://yfrog.com/')) == 'http://yfrog.com/') { $eurl = $u . '.th.jpg'; $is_image = 1; } elseif (substr($u, 0, strlen('http://twitgoo.com/')) == 'http://twitgoo.com/') { $eurl = 'http://twitgoo.com/show/thumb/' . substr($u, strlen('http://twitgoo.com/')); $is_image = 1; } elseif (substr($u, 0, strlen('http://picplz.com/')) == 'http://picplz.com/') { $eurl = $u . '/thumb/'; $is_image = 1; } elseif (substr($u, 0, strlen('http://flic.kr/')) == 'http://flic.kr/') { $is_image = 1; } elseif (substr($u, 0, strlen('http://instagr.am/')) == 'http://instagr.am/') { // see: http://instagr.am/developer/embedding/ for reference // the following does a redirect to the actual jpg // make a check for an end slash in the url -- if it is there (likely) then adding a second // slash prior to the 'media' string will break the expanded url if ($u[strlen($u) - 1] == '/') { $eurl = $u . 'media/'; } else { $eurl = $u . '/media/'; } $logger->logDebug("expanded instagram URL to: " . $eurl, __METHOD__ . ',' . __LINE__); $is_image = 1; } if ($link_dao->insert($u, $eurl, $title, $tweet['post_id'], 'twitter', $is_image)) { $logger->logSuccess("Inserted " . $u . " (" . $eurl . ", " . $is_image . "), into links table", __METHOD__ . ',' . __LINE__); } else { $logger->logError("Did NOT insert " . $u . " (" . $eurl . ") into links table", __METHOD__ . ',' . __LINE__); } } }
/** * Check the validity of G+'s OAuth token by requestig the instance user's details. * Fetch details from Google+ API for the current instance user and insert into the datastore. * @param str $client_id * @param str $client_secret * @param str $access_token * @param str $refresh_token * @param str $owner_id * @return User */ public function initializeInstanceUser($client_id, $client_secret, $access_token, $refresh_token, $owner_id) { $network = 'google+'; $user_dao = DAOFactory::getDAO('UserDAO'); $user_object = null; // Get owner user details and save them to DB $fields = array('fields' => 'displayName,id,image,tagline'); $user_details = $this->api_accessor->apiRequest('people/me', $this->access_token, $fields); if (isset($user_details->error->code) && $user_details->error->code == '401') { //Token has expired, fetch and save a new one $tokens = self::getOAuthTokens($client_id, $client_secret, $refresh_token, 'refresh_token'); if (isset($tokens->error) || !isset($tokens->access_token)) { $error_msg = "Oops! Something went wrong while obtaining OAuth tokens.<br>Google says \""; if (isset($tokens->error)) { $error_msg .= $tokens->error; } else { $error_msg .= Utils::varDumpToString($tokens); } $error_msg .= ".\" Please double-check your settings and try again."; $this->logger->logError($error_msg, __METHOD__ . ',' . __LINE__); } else { $owner_instance_dao = DAOFactory::getDAO('OwnerInstanceDAO'); $owner_instance_dao->updateTokens($owner_id, $this->instance->id, $access_token, $refresh_token); $this->access_token = $tokens->access_token; //try again $user_details = $this->api_accessor->apiRequest('people/me', $this->access_token, $fields); } } if (isset($user_details)) { $user_details->network = $network; $user = $this->parseUserDetails($user_details); } if (isset($user)) { $user_object = new User($user, 'Owner initialization'); $user_dao->updateUser($user_object); } if (isset($user_object)) { $this->logger->logSuccess("Successfully fetched " . $user_object->username . " " . $user_object->network . "'s details from Google+", __METHOD__ . ',' . __LINE__); } else { $this->logger->logInfo("Error fetching user details from the Google+ API, response was " . Utils::varDumpToString($user_details), __METHOD__ . ',' . __LINE__); } return $user_object; }
/** * Zwraca obiekt jednej z klas pochodnych klasy ZarzadcaKomentarzy. * * Zaleznie od sposobu przechowywania komentarzy, zwracany obiekt jest obiektem klasy: * - "pliki" ZarzadcaKomentarzyZPliku * - "mysql" ZarzadcaKomentarzyZBazyDanychMysql * - inny null */ function daneKomentarzy() { $ustawieniaGaleriiZdjec = new UstawieniaGaleriiZdjec(); $daneKomentarzy = null; switch ($ustawieniaGaleriiZdjec->domyslnySposobPrzechowywaniaKomentarzy) { case "mysql": include_once("DaneKomentarzyZBazyDanychMysql.class.php"); $daneKomentarzy = new DaneKomentarzyZBazyDanychMysql($ustawieniaGaleriiZdjec->mysqlNazwaTabeliZKomentarzami); break; case "pliki": Logger::logError("Przechowywanie komentarzy do zdjec w plikach nie jest zaimplementowane."); //include_once("DaneKomentarzyZPlikow.class.php"); //$daneKomentarzy = new DaneKomentarzyZPlikow(); break; default: Logger::logError("Nieznany typ przechowywania komentarzy: '" . $ustawieniaGaleriiZdjec->sposobPrzechowywaniaKomentarzy . "'"); break; } return $daneKomentarzy; }
/** * For a given post, extract URLs and store them, including image_src if that's from a known source like Twitpic, * Twitgoo, Yfrog, Instagr.am. * @param str $post_text * @param int $post_id * @param str $network * @param Logger $logger * @param arr $urls Array of URLs, optionally set, defaults to null */ public static function processPostURLs($post_text, $post_id, $network, $logger, $urls = null) { if (!$urls) { $urls = Post::extractURLs($post_text); } if ($urls) { $link_dao = DAOFactory::getDAO('LinkDAO'); foreach ($urls as $url) { $logger->logInfo("Processing URL: {$url}", __METHOD__ . ',' . __LINE__); $image_src = self::getImageSource($url); //if we have an image_src, the URL is a known image source not in need of expansion $expanded_url = isset($image_src) ? $url : ''; $link_array = array('url' => $url, 'expanded_url' => $expanded_url, "image_src" => $image_src, 'post_id' => $post_id, 'network' => $network); $link = new Link($link_array); if ($link_dao->insert($link)) { $logger->logSuccess("Inserted " . $url . " (thumbnail " . $image_src . "), into links table", __METHOD__ . ',' . __LINE__); } else { $logger->logError("Did NOT insert " . $url . " (thumbnail " . $image_src . ") into links table", __METHOD__ . ',' . __LINE__); } } } }
/** * Convert parsed JSON of a profile or page's posts into ThinkUp posts and users * @param Object $stream * @param str $source The network for the post, either 'facebook' or 'facebook page' * @param int Page number being processed * @return int $total_added_posts How many posts (excluding comments) got added to the data store */ private function processStream($stream, $network, $page_number) { $thinkup_posts = array(); $total_added_posts = 0; $total_added_comments = 0; $thinkup_users = array(); $total_added_users = 0; $thinkup_links = array(); $total_links_added = 0; $thinkup_likes = array(); $total_added_likes = 0; $profiles = array(); //efficiency control vars $must_process_likes = true; $must_process_comments = true; $post_comments_added = 0; $post_likes_added = 0; $comments_difference = false; $likes_difference = false; $post_dao = DAOFactory::getDAO('PostDAO'); foreach ($stream->data as $index => $p) { $post_id = explode("_", $p->id); $post_id = $post_id[1]; $this->logger->logInfo("Beginning to process " . $post_id . ", post " . ($index + 1) . " of " . count($stream->data) . " on page " . $page_number, __METHOD__ . ',' . __LINE__); // stream can contain posts from multiple users. get profile for this post $profile = null; if (!empty($profiles[$p->from->id])) { $profile = $profiles[$p->from->id]; } else { $profile = $this->fetchUser($p->from->id, 'Post stream', true); $profiles[$p->from->id] = $profile; } //Assume profile comments are private and page posts are public $is_protected = $network == 'facebook' ? 1 : 0; //Get likes count $likes_count = 0; //Normalize likes to be one array if (isset($p->likes)) { $likes_count = $p->likes->summary->total_count; $p->likes = $this->normalizeLikes($p->likes); } // Normalize comments to be one array $comments_count = 0; if (isset($p->comments)) { $comments_count = $p->comments->summary->total_count; $p->comments = $this->normalizeComments($p->comments); } $post_in_storage = $post_dao->getPost($post_id, $network); //Figure out if we have to process likes and comments if (isset($post_in_storage)) { $this->logger->logInfo("Post " . $post_id . " already in storage", __METHOD__ . ',' . __LINE__); if ($post_in_storage->favlike_count_cache >= $likes_count) { $must_process_likes = false; $this->logger->logInfo("Already have " . $likes_count . " like(s) for post " . $post_id . " in storage; skipping like processing", __METHOD__ . ',' . __LINE__); } else { $likes_difference = $likes_count - $post_in_storage->favlike_count_cache; $this->logger->logInfo($likes_difference . " new like(s) to process for post " . $post_id, __METHOD__ . ',' . __LINE__); } if (isset($p->comments->summary->total_count)) { if ($post_in_storage->reply_count_cache >= $p->comments->summary->total_count) { $must_process_comments = false; $this->logger->logInfo("Already have " . $post_in_storage->reply_count_cache . " comment(s) for post " . $post_id . "; skipping comment processing", __METHOD__ . ',' . __LINE__); } else { $comments_difference = $p->comments->summary->total_count - $post_in_storage->reply_count_cache; $this->logger->logInfo($comments_difference . " new comment(s) of " . $p->comments->summary->total_count . " total to process for post " . $post_id, __METHOD__ . ',' . __LINE__); } } } else { $this->logger->logInfo("Post " . $post_id . " not in storage", __METHOD__ . ',' . __LINE__); } if (!isset($profile)) { $this->logger->logError("No profile set", __METHOD__ . ',' . __LINE__); } else { if (!isset($post_in_storage)) { $this->logger->logInfo("Post " . $post_id . " has " . $comments_count . " comments", __METHOD__ . ',' . __LINE__); $post_to_process = array("post_id" => $post_id, "author_username" => $profile->username, "author_fullname" => $profile->username, "author_avatar" => $profile->avatar, "author_user_id" => $p->from->id, "post_text" => isset($p->message) ? $p->message : '', "pub_date" => $p->created_time, "favlike_count_cache" => $likes_count, "reply_count_cache" => $comments_count, "in_reply_to_user_id" => isset($p->to->data[0]->id) ? $p->to->data[0]->id : '', "in_reply_to_post_id" => '', "source" => '', 'network' => $network, 'is_protected' => $is_protected, 'location' => ''); $new_post_key = $this->storePostAndAuthor($post_to_process, "Owner stream"); if ($new_post_key !== false) { $total_added_posts++; } if (isset($p->source) || isset($p->link)) { // there's a link to store $link_url = isset($p->source) ? $p->source : $p->link; $link = new Link(array("url" => $link_url, "expanded_url" => '', "image_src" => isset($p->picture) ? $p->picture : '', "caption" => isset($p->caption) ? $p->caption : '', "description" => isset($p->description) ? $p->description : '', "title" => isset($p->name) ? $p->name : '', "post_key" => $new_post_key)); array_push($thinkup_links, $link); } $total_links_addded = $total_links_added + $this->storeLinks($thinkup_links); if ($total_links_added > 0) { $this->logger->logUserSuccess("Collected {$total_links_added} new links", __METHOD__ . ',' . __LINE__); } //free up memory $thinkup_links = array(); } else { // post already exists in storage if ($must_process_likes) { //update its like count only $post_dao->updateFavLikeCount($post_id, $network, $likes_count); $this->logger->logInfo("Updated Like count for post " . $post_id . " to " . $likes_count, __METHOD__ . ',' . __LINE__); } } if ($must_process_comments) { if (isset($p->comments)) { $comments_captured = 0; if (isset($p->comments->data)) { $post_comments = $p->comments->data; $post_comments_count = isset($post_comments) ? sizeof($post_comments) : 0; if (is_array($post_comments) && sizeof($post_comments) > 0) { foreach ($post_comments as $c) { if (isset($c->from)) { // Sometimes the id is parent_poster_postId // sometimes it's just parent_postId $comment_id = explode("_", $c->id); if (count($comment_id) == 3) { $comment_id = $comment_id[2]; } else { $comment_id = $comment_id[1]; } //only add to queue if not already in storage $comment_in_storage = $post_dao->getPost($comment_id, $network); if (!isset($comment_in_storage)) { $comment_to_process = array("post_id" => $comment_id, "author_username" => $c->from->name, "author_fullname" => $c->from->name, "author_gender" => $c->from->gender, "author_birthday" => $c->from->birthday, "author_avatar" => 'https://graph.facebook.com/' . $c->from->id . '/picture', "author_user_id" => $c->from->id, "post_text" => $c->message, "pub_date" => $c->created_time, "in_reply_to_user_id" => $profile->user_id, "in_reply_to_post_id" => $post_id, "source" => '', 'network' => $network, 'is_protected' => $is_protected, 'location' => ''); array_push($thinkup_posts, $comment_to_process); $comments_captured = $comments_captured + 1; } } } } } $post_comments_added = $post_comments_added + $this->storePostsAndAuthors($thinkup_posts, "Post stream comments"); //free up memory $thinkup_posts = array(); if (is_int($comments_difference) && $post_comments_added >= $comments_difference) { $must_process_comments = false; if (isset($comments_stream->paging->next)) { $this->logger->logInfo("Caught up on post " . $post_id . "'s balance of " . $comments_difference . " comments; stopping comment processing", __METHOD__ . ',' . __LINE__); } } // collapsed comment thread if (isset($p->comments->summary->total_count) && $p->comments->summary->total_count > $comments_captured && $must_process_comments) { if (is_int($comments_difference)) { $offset = $p->comments->summary->total_count - $comments_difference; $offset_arr = array('offset' => $offset, 'limit' => $comments_difference); } else { $offset_arr = null; } $api_call = $p->from->id . '_' . $post_id . '/comments'; do { $comments_stream = FacebookGraphAPIAccessor::apiRequest($api_call, $this->access_token, $offset_arr); if (isset($comments_stream) && isset($comments_stream->data) && is_array($comments_stream->data)) { foreach ($comments_stream->data as $c) { if (isset($c->from)) { $comment_id = explode("_", $c->id); $comment_id = $comment_id[sizeof($comment_id) - 1]; //only add to queue if not already in storage $comment_in_storage = $post_dao->getPost($comment_id, $network); if (!isset($comment_in_storage)) { $comment_to_process = array("post_id" => $comment_id, "author_username" => $c->from->name, "author_fullname" => $c->from->name, "author_avatar" => 'https://graph.facebook.com/' . $c->from->id . '/picture', "author_user_id" => $c->from->id, "post_text" => $c->message, "pub_date" => $c->created_time, "in_reply_to_user_id" => $profile->user_id, "in_reply_to_post_id" => $post_id, "source" => '', 'network' => $network, 'is_protected' => $is_protected, 'location' => ''); array_push($thinkup_posts, $comment_to_process); } } } $post_comments_added = $post_comments_added + $this->storePostsAndAuthors($thinkup_posts, "Posts stream comments collapsed"); if (is_int($comments_difference) && $post_comments_added >= $comments_difference) { $must_process_comments = false; if (isset($comments_stream->paging->next)) { $this->logger->logInfo("Caught up on post " . $post_id . "'s balance of " . $comments_difference . " comments; stopping comment processing", __METHOD__ . ',' . __LINE__); } } //free up memory $thinkup_posts = array(); if (isset($comments_stream->paging->next)) { $api_call = str_replace('\\u00257C', '|', $comments_stream->paging->next); } } else { // no comments (pun intended) break; } } while (isset($comments_stream->paging->next) && $must_process_comments); } } if ($post_comments_added > 0) { //let user know $this->logger->logUserSuccess("Added " . $post_comments_added . " comment(s) for post " . $post_id, __METHOD__ . ',' . __LINE__); } else { $this->logger->logInfo("Added " . $post_comments_added . " comment(s) for post " . $post_id, __METHOD__ . ',' . __LINE__); } $total_added_comments = $total_added_comments + $post_comments_added; } //Inserting comments also increments the original post's reply_count_cache; reset it here $post_dao->updateReplyCount($post_id, $network, $comments_count); //process "likes" if ($must_process_likes) { if (isset($p->likes)) { $likes_captured = 0; if (isset($p->likes->data)) { $post_likes = $p->likes->data; $post_likes_count = isset($post_likes) ? sizeof($post_likes) : 0; if (is_array($post_likes) && sizeof($post_likes) > 0) { foreach ($post_likes as $l) { if (isset($l->name) && isset($l->id)) { //Get users $user_to_add = array("user_name" => $l->name, "full_name" => $l->name, "user_id" => $l->id, "avatar" => 'https://graph.facebook.com/' . $l->id . '/picture', "location" => '', "description" => '', "url" => '', "is_protected" => 1, "follower_count" => 0, "post_count" => 0, "joined" => '', "found_in" => "Likes", "network" => 'facebook'); //Users are always set to network=facebook array_push($thinkup_users, $user_to_add); $fav_to_add = array("favoriter_id" => $l->id, "network" => $network, "author_user_id" => $profile->user_id, "post_id" => $post_id); array_push($thinkup_likes, $fav_to_add); $likes_captured = $likes_captured + 1; } } } } $total_added_users = $total_added_users + $this->storeUsers($thinkup_users, "Likes"); $post_likes_added = $post_likes_added + $this->storeLikes($thinkup_likes); //free up memory $thinkup_users = array(); $thinkup_likes = array(); if (is_int($likes_difference) && $post_likes_added >= $likes_difference) { $must_process_likes = false; if (isset($likes_stream->paging->next)) { $this->logger->logInfo("Caught up on post " . $post_id . "'s balance of " . $likes_difference . " likes; stopping like processing", __METHOD__ . ',' . __LINE__); } } // collapsed likes if (isset($p->likes->count) && $p->likes->count > $likes_captured && $must_process_likes) { if (is_int($likes_difference)) { $offset = $p->likes->count - $likes_difference; $offset_arr = array('offset' => $offset); } else { $offset_arr = null; } $api_call = $p->from->id . '_' . $post_id . '/likes'; do { $likes_stream = FacebookGraphAPIAccessor::apiRequest($api_call, $this->access_token, $offset_arr); if (isset($likes_stream) && is_array($likes_stream->data)) { foreach ($likes_stream->data as $l) { if (isset($l->name) && isset($l->id)) { //Get users $user_to_add = array("user_name" => $l->name, "full_name" => $l->name, "user_id" => $l->id, "avatar" => 'https://graph.facebook.com/' . $l->id . '/picture', "is_protected" => 1, "location" => '', "description" => '', "url" => '', "follower_count" => 0, "post_count" => 0, "joined" => '', "found_in" => "Likes", "network" => 'facebook'); //Users are always set to network=facebook array_push($thinkup_users, $user_to_add); $fav_to_add = array("favoriter_id" => $l->id, "network" => $network, "author_user_id" => $p->from->id, "post_id" => $post_id); array_push($thinkup_likes, $fav_to_add); $likes_captured = $likes_captured + 1; } } $total_added_users = $total_added_users + $this->storeUsers($thinkup_users, "Likes"); $post_likes_added = $post_likes_added + $this->storeLikes($thinkup_likes); //free up memory $thinkup_users = array(); $thinkup_likes = array(); if (is_int($likes_difference) && $post_likes_added >= $likes_difference) { $must_process_likes = false; if (isset($likes_stream->paging->next)) { $this->logger->logInfo("Caught up on post " . $post_id . "'s balance of " . $likes_difference . " likes; stopping like processing", __METHOD__ . ',' . __LINE__); } } if (isset($likes_stream->paging->next)) { $api_call = str_replace('\\u00257C', '|', $likes_stream->paging->next); } } else { // no likes break; } } while (isset($likes_stream->paging->next) && $must_process_likes); } } $this->logger->logInfo("Added " . $post_likes_added . " like(s) for post " . $post_id, __METHOD__ . ',' . __LINE__); $total_added_likes = $total_added_likes + $post_likes_added; } //free up memory $thinkup_users = array(); $thinkup_likes = array(); } //reset control vars for next post $must_process_likes = true; $must_process_comments = true; $post_comments_added = 0; $post_likes_added = 0; $comments_difference = false; $likes_difference = false; } $this->logger->logUserSuccess("On page " . $page_number . ", captured " . $total_added_posts . " post(s), " . $total_added_comments . " comment(s), " . $total_added_users . " user(s) and " . $total_added_likes . " like(s)", __METHOD__ . ',' . __LINE__); return $total_added_posts; }
/** * Collects and stores information about the users videos from the YouTube APIs * Currently collects and stores: * - Basic video information such as title, author, description and location the video was shot in (if available) * - Replies to the video * -- This uses the YouTube V2 API due to the V3 API currently not supporting replies * - All time counts for likes, dislikes, views, average view duration, average view percentage, favorites added, * favorites removed, shares, subscribers gained and subscribers lost * -- The totals for these are stored in the videos table, a history of these totals is stored in the * count_history table under a type of [metric]_all_time and date of todays date * -- A record of these metrics for indivdual days is also saved in the count_history table under a type of * [metric] and date of the day the metric represents usually two days ago due to a delay in the availability * of data from the Analytics API * @return null */ public function fetchInstanceUserVideos() { $video_dao = DAOFactory::getDAO('VideoDAO'); $user_dao = DAOFactory::getDAO('UserDAO'); $post_dao = DAOFactory::getDAO('PostDAO'); $count_history_dao = DAOFactory::getDAO('CountHistoryDAO'); $instance_dao = DAOFactory::getDAO('InstanceDAO'); // Get the users upload playlist ID $fields_for_ids = array('part' => 'contentDetails,statistics', 'mine' => 'true'); $various_ids = $this->youtube_api_accessor->apiRequest('channels', $this->access_token, $fields_for_ids); $upload_id = $various_ids->items[0]->contentDetails->relatedPlaylists->uploads; // Also get their channel ID as we'll need it later on $channel_id = $various_ids->items[0]->id; // There are some required attributes about the author that YouTube doesn't return for the videos so we need // to query the database for them $author_details = $user_dao->getDetails($this->instance->network_user_id, 'youtube'); $user_id = $this->instance->network_user_id; // Update the users subscriber count $subscriber_count = $various_ids->items[0]->statistics->subscriberCount; $author_details->follower_count = $subscriber_count; $user_dao->updateUser($author_details); $count_history_dao->insert($user_id, 'youtube', $subscriber_count, null, 'subscriber_count'); // Calculate the time at which we should stop fetching videos $end_time = time() + $this->max_crawl_time; // Keep track of if we finished the crawl early due to timing out $had_to_finish_early = false; // Check if we already loaded all the old posts for this user $archive_loaded = $instance->is_archive_loaded_posts; // If the archive isn't loaded yet keep track of how many times we've tried to load it if (!$archive_loaded) { $attempts = $count_history_dao->getLatestCountByNetworkUserIDAndType($user_id, 'youtube', 'youtube_archive_attempts'); if ($attempts == null) { // If this is the first crawler run $attempts['count'] = 0; } $attempts['count']++; $count_history_dao->insert($user_id, 'youtube', $attempts['count'], null, 'youtube_archive_attempts', null); } // Now page through their videos collecting the data $videos_fields = array('part' => 'snippet', 'maxResults' => '25', 'playlistId' => $upload_id, 'pageToken' => null); // We may get multiple pages do { // This is a page of IDs of videos the user has uploaded $user_videos = $this->youtube_api_accessor->apiRequest('playlistItems', $this->access_token, $videos_fields); // For each video store the relevant details about it foreach ($user_videos->items as $video) { // If we've hit the max crawl time stop if (time() >= $end_time) { $this->logger->logUserInfo("Stopping this service users crawl because it has exceeded max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__); $had_to_finish_early = true; break 2; } $video_id = $video->snippet->resourceId->videoId; // Get the title, description, likes, dislikes, views, and details about where // the video was taken from the data API $video_fields = array('id' => $video_id, 'part' => 'statistics,id,snippet,recordingDetails,status'); $video_details = $this->youtube_api_accessor->apiRequest('videos', $this->access_token, $video_fields); $item = $video_details->items[0]; // Check we haven't used up our quota if (isset($video_details->error)) { $this->logger->logError('Error querying YouTube Data API V3 ', __METHOD__ . ',' . __LINE__); break; } $video_attributes['post_text'] = $item->snippet->title; $video_attributes['description'] = $item->snippet->description; $video_attributes['likes'] = $item->statistics->likeCount; $video_attributes['dislikes'] = $item->statistics->dislikeCount; $video_attributes['views'] = $item->statistics->viewCount; // Keep track of these all time counts $count_history_dao->insert($user_id, 'youtube', $video_attributes['likes'], $video_id, 'likes_all_time'); $count_history_dao->insert($user_id, 'youtube', $video_attributes['dislikes'], $video_id, 'dislikes_all_time'); $count_history_dao->insert($user_id, 'youtube', $video_attributes['views'], $video_id, 'views_all_time'); $video_attributes['pub_date'] = $item->snippet->publishedAt; $video_attributes['post_id'] = $item->id; $video_attributes['location'] = $item->recordingDetails->locationDescription; $video_attributes['place'] = $item->recordingDetails->locationDescription; if (isset($item->recordingDetails->latitude)) { $video_attributes['geo'] = $item->recordingDetails->latitude . "," . $item->recordingDetails->longitude; } $video_attributes['is_protected'] = self::determinePrivacyStatus($item->status->privacyStatus); $today = date('Y-m-d'); $upload_date = substr($item->snippet->publishedAt, 0, 10); // Get the favourites added, favourites removed, shares, subscribers gained, subscribers lost // estimated minuites watched, average view duration, average view percentage $analytics_fields = array('ids' => 'channel==' . $channel_id, 'start-date' => $upload_date, 'end-date' => $today, 'metrics' => 'favoritesAdded,favoritesRemoved,shares,subscribersGained,subscribersLost,' . 'estimatedMinutesWatched,averageViewDuration,averageViewPercentage,views,likes,dislikes', 'filters' => 'video==' . $video_id); $video_analytics_details = $this->youtube_analytics_api_accessor->apiRequest('reports', $this->access_token, $analytics_fields); // Check we haven't used up our quota if (isset($video_analytics_details->error)) { $this->logger->logError('Error querying YouTube Analytics API', __METHOD__ . ',' . __LINE__); break; } $analytics_item = $video_analytics_details->rows[0]; // If the video is new we may not get any of these values back, but they can't be null if (isset($analytics_item)) { $video_attributes['favorites_added'] = $analytics_item[0]; $video_attributes['favorites_removed'] = $analytics_item[1]; $video_attributes['shares'] = $analytics_item[2]; $video_attributes['subscribers_gained'] = $analytics_item[3]; $video_attributes['subscribers_lost'] = $analytics_item[4]; $video_attributes['minutes_watched'] = $analytics_item[5]; $video_attributes['average_view_duration'] = $analytics_item[6]; $video_attributes['average_view_percentage'] = $analytics_item[7]; // Keep track of these all time counts $count_history_dao->insert($user_id, 'youtube', $analytics_item[0], $video_id, 'favorites_added_all_time'); $count_history_dao->insert($user_id, 'youtube', $analytics_item[1], $video_id, 'favorites_removed_all_time'); $count_history_dao->insert($user_id, 'youtube', $analytics_item[2], $video_id, 'shares_all_time'); $count_history_dao->insert($user_id, 'youtube', $analytics_item[3], $video_id, 'subscribers_gained_all_time'); $count_history_dao->insert($user_id, 'youtube', $analytics_item[4], $video_id, 'subscribers_lost_all_time'); $count_history_dao->insert($user_id, 'youtube', $analytics_item[5], $video_id, 'minutes_watched_all_time'); $count_history_dao->insert($user_id, 'youtube', $analytics_item[6], $video_id, 'average_view_duration_all_time'); $count_history_dao->insert($user_id, 'youtube', $analytics_item[7], $video_id, 'average_view_percentage_all_time'); } else { // So set them the 0 $video_attributes['favorites_added'] = 0; $video_attributes['favorites_removed'] = 0; $video_attributes['shares'] = 0; $video_attributes['subscribers_gained'] = 0; $video_attributes['subscribers_lost'] = 0; $video_attributes['minutes_watched'] = 0; $video_attributes['average_view_duration'] = 0; $video_attributes['average_view_percentage'] = 0; } $video_attributes['author_user_id'] = $this->instance->network_user_id; $video_attributes['author_username'] = $this->instance->network_username; $video_attributes['author_fullname'] = $author_details->full_name; $video_attributes['author_avatar'] = $author_details->avatar; $video_attributes['source'] = ''; $video_attributes['network'] = 'youtube'; $video_dao->addVideo($video_attributes); // Now collect per day count data for 2 days ago (testing has shown analytics data is delayed by 2 days) $two_days_ago = date('Y-m-d', strtotime("-2 day", strtotime($today))); $analytics_fields['start-date'] = $two_days_ago; $analytics_fields['end-date'] = $two_days_ago; $analytics_today_details = $this->youtube_analytics_api_accessor->apiRequest('reports', $this->access_token, $analytics_fields); // Check we haven't used up our quota if (isset($analytics_today_details->error)) { $this->logger->logError('Error querying YouTube Analytics API', __METHOD__ . ',' . __LINE__); break; } $todays_analytics = $analytics_today_details->rows[0]; // Check we got data and if not skip this part if (isset($todays_analytics)) { $count_history_dao->insert($user_id, 'youtube', $todays_analytics[0], $video_id, 'favorites_added', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[1], $video_id, 'favorites_removed', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[2], $video_id, 'shares', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[3], $video_id, 'subscribers_gained', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[4], $video_id, 'subscribers_lost', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[5], $video_id, 'minutes_watched', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[6], $video_id, 'average_view_duration', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[7], $video_id, 'average_view_percentage', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[8], $video_id, 'views', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[9], $video_id, 'likes', $two_days_ago); $count_history_dao->insert($user_id, 'youtube', $todays_analytics[10], $video_id, 'dislikes', $two_days_ago); } // Check to see how many comments we already have for this video and if there are no new ones skip // comment collection as it takes a long time. $video_in_db = $video_dao->getVideoByID($video_id, 'youtube'); $comments_in_db = $video_in_db->reply_count_cache; if (!isset($comments_in_db)) { $comments_in_db = 0; } $api_comments = $item->statistics->commentCount; $comments_collected = 0; // if this video has any new comments capture those if ($api_comments > 0 && $api_comments > $comments_in_db) { // Request the first page of comments for this video $comments_fields = array('alt' => 'json'); if (isset($this->developer_key)) { $comments_fields['key'] = $this->developer_key; } $comments = $this->youtube_api_v2_accessor->apiRequest('videos/' . $video_id . '/comments', $comments_fields); // Check we haven't used up our quota if (isset($comments->errors)) { $this->logger->logError('Error querying YouTube Data API V2 ', __METHOD__ . ',' . __LINE__); break; } do { // Iterate through each comment and store the details foreach ($comments->feed->entry as $comment) { // We may have only needed to collect a few new comments so abort if we have everything if ($api_comments == $comments_in_db) { break 2; } // If the user has specified a limit on the number of comments per video to collect each // crawl check we haven't exceeded it if (isset($this->maximum_comments) && $comments_collected >= $this->maximum_comments) { $this->logger->logUserInfo("Stopping collection of comments for video due to reaching " . "limit of " . $this->maximum_comments . " comments.", __METHOD__ . ',' . __LINE__); break 2; } // We may spend a long time collecting comments so also check here if we've exceed the max // time specified by the user if (time() >= $end_time) { $this->logger->logUserInfo("Stopping this service users crawl because it has exceeded " . "max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__); $had_to_finish_early = true; break 4; } // The id is returned in the XML as part of a long URL, we only want the last part of that // URL $id_string = explode('/', $comment->id->{'$t'}); // This will be the last element of id_string $comment_store['post_id'] = $id_string[sizeof($id_string) - 1]; // The post text is the comment they made // Remove byte order markers from the comment text from: // http://stackoverflow.com/questions/3255993/how-do-i-remove-i-from-the-beginning // -of-a-file#comment9330944_3256183 $comment_store['post_text'] = preg_replace('/\\x{EF}\\x{BB}\\x{BF}/', '', $comment->content->{'$t'}); // The author username is the users G+ displayname which we need to query for // To get the G+ ID of this commentor we need to vist their youtube profile page, the ID // needed to get to this users page is the last element of the author URI $user_id_string = explode('/', $comment->author[0]->uri->{'$t'}); $name = $this->youtube_api_v2_accessor->apiRequest('users/' . $user_id_string[sizeof($user_id_string) - 1], $comments_fields); $gplus_id = $name->entry->{'yt$googlePlusUserId'}->{'$t'}; // // Now we have their G+ ID we can get their details from the G+ API $gplus_fields = array('fields' => 'displayName,id,image,tagline,verified'); $user_details = $this->google_plus_api_accessor->apiRequest('people/' . $gplus_id, $this->access_token, $gplus_fields); // Sometimes G+ says the ID is invalid or the user doesn't have a G+ ID if ($user_details->error->code == '404' || $gplus_id == '') { // Use V2 of the YouTube api to get their details $comment_store['author_username'] = $name->entry->{'yt$username'}->{'$t'}; $comment_store['author_fullname'] = $name->entry->author[0]->name->{'$t'}; $comment_store["author_avatar"] = $name->entry->{'media$thumbnail'}->url; // In this case the user id is their YouTube user ID $comment_store['author_user_id'] = $user_id_string[sizeof($user_id_string) - 1]; self::fetchUserFromYouTube($user_id_string[sizeof($user_id_string) - 1], 'youtube_crawler'); // If we still didn't get these details we can't store this comment if ($comment_store['author_username'] == null || $comment_store['author_fullname'] == null || $comment_store["author_avatar"] == null) { continue; } } elseif (isset($user_details->error)) { //Check we haven't exceed the G+ API quota $this->logger->logError('Error querying Google Plus API ', __METHOD__ . ',' . __LINE__); break; } else { $comment_store['author_username'] = $user_details->displayName; $comment_store['author_fullname'] = $user_details->displayName; $comment_store["author_avatar"] = $user_details->image->url; // The author user id is their G+ ID $comment_store['author_user_id'] = $gplus_id; // Make sure we have this commentor in the database self::fetchUser($gplus_id, 'youtube crawler'); } // The date they posted the comment $comment_store['pub_date'] = substr($comment->published->{'$t'}, 0, 10) . " " . substr($comment->published->{'$t'}, 11, 8); // Source of the comment $comment_store['source'] = ""; // Comments can not be private $comment_store['is_protected'] = false; // Set the network to youtube $comment_store['network'] = 'youtube'; // The ID of the author of the video $comment_store['in_reply_to_user_id'] = $this->instance->network_user_id; // The ID of the video this comment is a reply to $comment_store['in_reply_to_post_id'] = $video_id; $insert_id = $post_dao->addPost($comment_store); // If the insert id is null and were not going back to collect the whole archive // we've already captured comments from this point so move on if ($insert_id == null && $archive_loaded) { break 2; } $comments_in_db++; $comments_collected++; } $test = self::determineIfMoreCommentsExist($comments, $video_id); // If there is another page of comments make a request for them if ($test['next']) { $comments = $this->youtube_api_v2_accessor->basicApiRequest($test['url']); // Check we haven't used up our quota if (isset($comments->errors)) { $this->logger->logError('Error querying YouTube Data API V2 ', __METHOD__ . ',' . __LINE__); break; } elseif ($comments == null) { // If the comments come back as null its because we've been making too many requests too // quickly The YouTube api doesn't return valid JSON telling us this though so // json_decode returns null so back off for 30 seconds and then try again $error_message = "Querying the YouTube API too often waiting for 30 seconds, to "; $error_message .= "prevent this delay add a developer key."; $this->logger->logError($error_message, __METHOD__ . ',' . __LINE__); sleep(30); $comments = $this->youtube_api_v2_accessor->basicApiRequest($test['url']); } } } while ($test['next']); } // If we have another page of videos then get the token for the page if (isset($user_videos->nextPageToken)) { $videos_fields['pageToken'] = $user_videos->nextPageToken; } } // If we have another page of videos and haven't loaded all this users video yet keep going // if we have loaded all this users videos then stop after 1 page (50 videos) } while (isset($user_videos->nextPageToken) && !$archive_loaded); // If we didn't have to finish the crawl early due to timing out we have collected all this users videos or // we have tried more than 20 times stop trying to go back and load the post archive if (!$had_to_finish_early || $attempts >= 20) { $instance_dao->setPostArchiveLoaded($user_id, 'youtube'); } }
public function updateContext($user_id, $creator_id, $data, $context) { if (!$user_id || !$creator_id || !isset($data)) { $message = "Stories->updateContext() missing args"; Logger::logError($message); throw new Exception($message); } //print_r($data); $db = Database::obtain(); $data = $db->escape($data); $data = sanitize($data); $creator_id = $db->escape($creator_id); $creator_id = sanitize($creator_id); $context = $db->escape($context); $context = sanitize($context); $user_id = $db->escape($user_id); $user_id = sanitize($user_id); $sql = "UPDATE `users` SET `" . $context . "` = '" . $data . "' WHERE `id`='" . $db->escape($user_id) . "'"; $db->query($sql); }
/** * cleanUpMissedFavsUnFavs pages back through the older pages of favs, checking for favs that are not yet in * the database, as well as favs that were added to the db but are no longer returned by Twitter's API. * However, that latter calculation, for un-fav'd tweets, is currently not reliable due to a bug on Twitter's end, * and so such tweets are not currently removed from the database. * Due to the same issue with the API, it's not clear whether all favs of older tweets are going to be actually * returned from Twitter (that is, it is currently not returning some actually-favorited tweets in a given range). * So, we may miss some older tweets that were in fact favorited, until Twitter fixes this. * The number of pages to page back for each run of the crawler is set by favs_cleanup_pages option. */ public function cleanUpMissedFavsUnFavs() { // first, check that we have the resources to do work if (!($this->api->available && $this->api->available_api_calls_for_crawler)) { $this->logger->logInfo("terminating cleanUpMissedFavsUnFavs-- no API calls available", __METHOD__ . ',' . __LINE__); return true; } $this->logger->logInfo("In cleanUpMissedFavsUnFavs", __METHOD__ . ',' . __LINE__); $this->logger->logInfo("User id: " . $this->user->user_id . "\n", __METHOD__ . ',' . __LINE__); $fcount = 0; $favs_cleanup_pages = 1; // default number of pages to process each time the crawler runs // get plugin option value if it exists & is positive int, otherwise use default $topt = $this->twitter_options; if (isset($topt['favs_cleanup_pages'])) { $conf_favs_cleanup_pages = $topt['favs_cleanup_pages']->option_value; $this->logger->logInfo("conf_favs_cleanup_pages: {$conf_favs_cleanup_pages} ", __METHOD__ . ',' . __LINE__); if (is_integer((int) $conf_favs_cleanup_pages) && $conf_favs_cleanup_pages > 0) { $favs_cleanup_pages = $conf_favs_cleanup_pages; } } $this->logger->logInfo("favs_cleanup_pages: {$favs_cleanup_pages} ", __METHOD__ . ',' . __LINE__); $fpd = DAOFactory::getDAO('FavoritePostDAO'); $pagesize = 20; // number of favs per page retrieved from the API call... (tbd: any way to get //this from the API?) // get 'favs_older_pages' plugin option value if it exists & is pos. int. Use it to calculate default start // page if set, otherwise use default value. $default_start_page = 2; $topt = $this->twitter_options; if (isset($topt['favs_older_pages'])) { $conf_older_favs_pages = $topt['favs_older_pages']->option_value; if (is_integer((int) $conf_older_favs_pages) && $conf_older_favs_pages > 0) { $default_start_page = $conf_older_favs_pages + 1; } } $this->logger->logInfo("default start page: {$default_start_page} ", __METHOD__ . ',' . __LINE__); $last_page_of_favs = round($this->api->archive_limit / $pagesize); $last_unfav_page_checked = $this->instance->last_unfav_page_checked; $start_page = $last_unfav_page_checked > 0 ? $last_unfav_page_checked + 1 : $default_start_page; $this->logger->logInfo("start page: {$start_page}, with {$favs_cleanup_pages} cleanup pages", __METHOD__ . ',' . __LINE__); $curr_favs_count = $this->user->favorites_count; $count = 0; $page = $start_page; while ($count < $favs_cleanup_pages && $this->api->available && $this->api->available_api_calls_for_crawler) { // get the favs from that page try { list($tweets, $cURL_status, $twitter_data) = $this->getFavsPage($page); } catch (APICallLimitExceededException $e) { break; } if ($cURL_status != 200 || $tweets == -1) { $this->logger->logError("Error parsing favs. Twitter data: " . $twitter_data . " cURL status: " . $cURL_status, __METHOD__ . ',' . __LINE__); } if (sizeof($tweets) == 0) { // then done paging backwards through the favs. // reset pointer so that we start at the recent favs again next time through. $this->instance->last_unfav_page_checked = 0; break; } $min_tweet = $tweets[sizeof($tweets) - 1]['post_id']; $max_tweet = $tweets[0]['post_id']; $this->logger->logInfo("in cleanUpUnFavs, page {$page} min and max: {$min_tweet}, {$max_tweet}", __METHOD__ . ',' . __LINE__); foreach ($tweets as $fav) { $fav['network'] = 'twitter'; // check whether the tweet is in the db-- if not, add it. if ($fpd->addFavorite($this->user->user_id, $fav) > 0) { URLProcessor::processPostURLs($fav['post_text'], $fav['post_id'], 'twitter', $this->logger); $this->logger->logInfo("added fav " . $fav['post_id'], __METHOD__ . ',' . __LINE__); $fcount++; } else { $status_message = "have already stored fav " . $fav['post_id']; $this->logger->logDebug($status_message, __METHOD__ . ',' . __LINE__); } } // now for each favorited tweet in the database within the fetched range, check whether it's still // favorited. This part of the method is currently disabled due to issues with the Twitter API, which // is not returning all of the favorited tweets any more. So, the fact that a previously-archived // tweet is not returned, no longer indicates that it was un-fav'd. // The method still IDs the 'missing' tweets, but no longer deletes them. We may want to get rid of // this check altogether at some point. $fposts = $fpd->getAllFavoritePostsUpperBound($this->user->user_id, 'twitter', $pagesize, $max_tweet + 1); foreach ($fposts as $old_fav) { $old_fav_id = $old_fav->post_id; if ($old_fav_id < $min_tweet) { $this->logger->logInfo("Old fav {$old_fav_id} out of range ", __METHOD__ . ',' . __LINE__); break; // all the rest will be out of range also then } // look for the old_fav_id in the array of fetched favs $found = false; foreach ($tweets as $tweet) { if ($old_fav_id == $tweet['post_id']) { $found = true; break; } } if (!$found) { // if it's not there... // 14/10 arghh -- Twitter is suddenly (temporarily?) not returning all fav'd tweets in a // sequence. // skipping the delete for now, keep tabs on it. Can check before delete with extra API // request, but the point of doing it this way was to avoid the additional API request. $this->logger->logInfo("Twitter claims tweet not still favorited, but this is currently " . "broken, so not deleting: " . $old_fav_id, __METHOD__ . ',' . __LINE__); // 'unfavorite' by removing from favorites table // $fpd->unFavorite($old_fav_id, $this->user->user_id); } } $this->instance->last_unfav_page_checked = $page++; if ($page > $last_page_of_favs) { $page = 0; break; } $count++; } $this->logger->logUserSuccess("Added {$fcount} older missed favorites", __METHOD__ . ',' . __LINE__); return true; }
protected static function logError($message, $category) { if (class_exists('Logger')) { Logger::logError($message, $category); } }
/** * Delete cache * * @param string $uri URI string * @return bool */ public function delete_cache($uri = '') { $cache_path = Core::$tempDir . DS . 'Output' . DS; if (!is_dir($cache_path)) { Logger::logError('Unable to find cache path: ' . $cache_path); return FALSE; } if (empty($uri)) { $uri = $this->uri->uri_string(); if (($cache_query_string = $this->config->cache->cache_query_string) && !empty($_SERVER['QUERY_STRING'])) { if (is_array($cache_query_string)) { $uri .= '?' . http_build_query(array_intersect_key($_GET, array_flip($cache_query_string))); } else { $uri .= '?' . $_SERVER['QUERY_STRING']; } } } $cache_path .= md5($this->config->mainbase_url . $this->config->main->index_page . ltrim($uri, '/')); if (!@unlink($cache_path)) { Logger::logError('Unable to delete cache file for ' . $uri); return FALSE; } return TRUE; }
/** * maintFavsFetch implements the core of the crawler's 'maintenance fetch' for favs. It goes into this mode * after the initial archving process. In maintenance mode the crawler is just looking for new favs. It searches * backwards until it finds the last-stored fav, then searches further back to find any older tweets that were * favorited unchronologically (as might happen if the user were looking back through a particular account's * timeline). The number of such pages to search back through is set in favs_older_pages option. * @param int $starting_fav_id * @param int $fcount * @param int $mpage * @param bool $older_favs_smode * @param int $stop_page * @param int $new_favs_to_add * @param int $last_fav_id * @param int $last_page_fetched_favorites * @param bool $continue * @return array($fcount, $mpage, $older_favs_smode, $stop_page, $new_favs_to_add, $last_fav_id, * $last_page_fetched_favorites, $continue); */ private function maintFavsFetch($starting_fav_id, $fcount, $mpage, $older_favs_smode, $stop_page, $new_favs_to_add, $last_fav_id, $last_page_fetched_favorites, $continue) { $status_message = ""; $older_favs_pages = 2; // default number of additional pages to check back through // for older favs added non-chronologically list($tweets, $cURL_status, $twitter_data) = $this->getFavsPage($mpage); if ($cURL_status == 200) { if ($tweets == -1) { // should not reach this $this->logger->logInfo("in maintFavsFetch; could not extract any tweets from response", __METHOD__ . ',' . __LINE__); throw new Exception("could not extract any tweets from response"); } if (sizeof($tweets) == 0) { // then done -- this should happen when we have run out of favs $this->logger->logInfo("It appears that we have run out of favorites to process", __METHOD__ . ',' . __LINE__); $continue = false; } else { $post_dao = DAOFactory::getDAO('FavoritePostDAO'); foreach ($tweets as $tweet) { $tweet['network'] = 'twitter'; if ($post_dao->addFavorite($this->user->user_id, $tweet) > 0) { URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger); $this->logger->logInfo("found new fav: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__); $fcount++; $this->logger->logInfo("fcount: {$fcount}", __METHOD__ . ',' . __LINE__); $this->logger->logInfo("added favorite: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__); } else { // fav was already stored, so take no action. This could happen both because some // of the favs on the given page were processed last time, or because a separate process, // such as a UserStream process, is also watching for and storing favs. $status_message = "have already stored fav " . $tweet['post_id']; $this->logger->logDebug($status_message, __METHOD__ . ',' . __LINE__); } // keep track of the highest fav id we've encountered if ($tweet['post_id'] > $last_fav_id) { $this->logger->logInfo("fav " . $tweet['post_id'] . " > {$last_fav_id}", __METHOD__ . ',' . __LINE__); $last_fav_id = $tweet['post_id'] + 0; } } // end foreach } $mpage++; // if have gone earlier than highest fav id from last time, then switch to 'search for older favs' mode if ($older_favs_smode == false) { // last-processed tweet if (isset($tweet) && $tweet['post_id'] <= $starting_fav_id) { // get 'favs_older_pages' plugin option value if it exists & is pos. int, otherwise use default $topt = $this->twitter_options; if (isset($topt['favs_older_pages'])) { $conf_older_favs_pages = $topt['favs_older_pages']->option_value; if (is_integer((int) $conf_older_favs_pages) && $conf_older_favs_pages > 0) { $older_favs_pages = $conf_older_favs_pages; } } $this->logger->logInfo("older_favs_pages: {$older_favs_pages}", __METHOD__ . ',' . __LINE__); $older_favs_smode = true; $stop_page = $mpage + $older_favs_pages - 1; $this->logger->logInfo("next will be searching for older favs: stop page: {$stop_page},\n fav <= {$starting_fav_id} ", __METHOD__ . ',' . __LINE__); } } else { // in older_favs_smode, check whether we should stop $this->logger->logInfo("in older favs search mode with stop page {$stop_page}", __METHOD__ . ',' . __LINE__); // check for terminating condition, which is (for now), that we have searched N more pages back // or found all the add'l tweets // 23/10/10 making temp (?) change due to broken API. // if ($mpage > $stop_page || $fcount >= $new_favs_to_add) { // temp change to not use the 'new favs to add' info while the api favs bug still exists-- it // breaks things under some circs. // hopefully this will be fixed again by Twitter at some point. if ($mpage > $stop_page) { $continue = false; } } } else { $this->logger->logError("cURL status: {$cURL_status}", __METHOD__ . ',' . __LINE__); $this->logger->logInfo($twitter_data, __METHOD__ . ',' . __LINE__); $continue = false; } return array($fcount, $mpage, $older_favs_smode, $stop_page, $new_favs_to_add, $last_fav_id, $last_page_fetched_favorites, $continue); }
function usun($numerKomentarza) { Logger::logError("Blad w metodzie MysqlKomentarzDoZdjecia->aktualizuj: metoda niezaimplementowana."); }