PHP Logger::logUserInfo примеры использования

Язык программирования: PHP

Класс/Тип: Logger

Метод/Функция: logUserInfo

Примеров на hotexamples.com: 11

PHP Logger::logUserInfo - 11 примеров найдено. Это лучшие примеры PHP кода для Logger::logUserInfo из пакета atk4, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

err(30)

configure(30)

log(30)

critical(30)

Log(30)

logInfo(30)

info(30)

log_event(30)

getRootLogger(30)

addLog(30)

debug(30)

getLogger(30)

trace(30)

write(30)

warning(30)

warn(30)

instance(30)

getInstance(30)

error(30)

remember(30)

LogEvent(27)

notice(25)

Error(24)

crit(22)

track_change(22)

emergency(21)

i(21)

Info(20)

fatal(18)

init(18)

logError(17)

addMessage(16)

alert(16)

event(16)

__construct(15)

logfile(15)

notify(14)

lfile(14)

lwrite(14)

logSuccess(13)

resetConfiguration(13)

dieMessage(11)

Warn(11)

getHierarchy(11)

logUserInfo(11)

logEvent(11)

insertLog(10)

emerg(10)

close(10)

error_pop(10)

Документация по классу Logger

Пример #1

Показать файл

Файл: class.InstagramCrawler.php Проект: ngugijames/ThinkUp

 /**
  * Fetch and save the instance users's friends.
  *
  * if is_archive_loaded
  *     if ($this->instance->total_follows_in_system !== $this->user->friend_count) {
  *         is_archive_loaded = false;
  *
  * if !is_archive_loaded
  *     if followed_by_next_cursor is set
  *         pageThroughFriends(followed_by_next_cursor)
  *     else
  *         pageThroughFriends()
  *
  * if is_archive_loaded
  *     updateStaleFollows()
  *
  */
 public function fetchFriends()
 {
     if (!isset($this->user)) {
         //Force-refresh instance user in data store
         $this->user = self::fetchUser($this->instance->network_user_id, 'Owner info', $this->instance->network_username, null, null, true);
     }
     $follow_dao = DAOFactory::getDAO('FollowDAO');
     $this->instance->total_friends_in_system = $follow_dao->countTotalFriends($this->instance->network_user_id, 'instagram');
     $this->logger->logUserInfo($this->instance->total_friends_in_system . " friends in system, " . $this->user->friend_count . " friends according to Instagram", __METHOD__ . ',' . __LINE__);
     if ($this->instance->total_friends_in_system < $this->user->friend_count) {
         $this->instance->is_archive_loaded_friends = false;
     } elseif ($this->instance->total_friends_in_system > $this->user->friend_count) {
         $this->instance->is_archive_loaded_friends = true;
     } else {
         $this->instance->is_archive_loaded_friends = true;
     }
     //If archive is not loaded, page through friends
     if (!$this->instance->is_archive_loaded_friends) {
         $this->logger->logInfo("Friend archive  is not loaded, start paging", __METHOD__ . ',' . __LINE__);
         $this->pageThroughFriends($this->instance->follows_next_cursor);
     }
     //If archive is loaded, updateStaleFollows
     if ($this->instance->is_archive_loaded_friends) {
         $this->logger->logInfo("Friend archive loaded, start updating stale friendships", __METHOD__ . ',' . __LINE__);
         $this->updateStaleFollows(true);
     }
 }

Пример #2

Показать файл

Файл: class.FacebookCrawler.php Проект: rkabir/ThinkUp

    /**
     * Fetch a save the posts and replies on a Facebook page.
     * @param int $pid Page ID
     */
    public function fetchPagePostsAndReplies($pid) {
        $stream = FacebookGraphAPIAccessor::apiRequest('/'.$pid.'/posts', $this->access_token);

        if (isset($stream->data) && is_array($stream->data) && sizeof($stream->data > 0)) {
            $this->logger->logSuccess(sizeof($stream->data)." Facebook posts found for page ID $pid.",
            __METHOD__.','.__LINE__);

            $thinkup_data = $this->parseStream($stream, 'facebook page');
            $posts = $thinkup_data["posts"];

            $post_dao = DAOFactory::getDAO('PostDAO');
            $added_posts = 0;
            foreach ($posts as $post) {
                if ($post['author_username']== "" && isset($post['author_user_id'])) {
                    $commenter_object = $this->fetchUserInfo($post['author_user_id'], 'facebook',
                    'Facebook page comments');
                    if (isset($commenter_object)) {
                        $post["author_username"] = $commenter_object->full_name;
                        $post["author_fullname"] = $commenter_object->full_name;
                        $post["author_avatar"] = $commenter_object->avatar;
                    }
                }

                $added_posts = $added_posts + $post_dao->addPost($post);
                $this->logger->logInfo("Added post ID ".$post["post_id"]." on ".$post["network"].
                " for ".$post["author_username"].":".$post["post_text"], __METHOD__.','.__LINE__);
            }

            $added_users = 0;
            $users = $thinkup_data["users"];
            if (count($users) > 0) {
                foreach ($users as $user) {
                    $user["post_count"] = $post_dao->getTotalPostsByUser($user['user_id'], $user['network']);
                    $found_in = 'Facebook page stream';
                    $user_object = new User($user, $found_in);
                    $user_dao = DAOFactory::getDAO('UserDAO');
                    $user_dao->updateUser($user_object);
                    $added_users = $added_users + 1;
                }
            }
            if ($added_posts > 0 || $added_users > 0) {
                $this->logger->logUserSuccess($added_posts." post(s) added; ".$added_users." user(s) updated.",
                __METHOD__.','.__LINE__);
            } else {
                $this->logger->logUserInfo("No new page posts found.", __METHOD__.','.__LINE__);
            }
        } else {
            $this->logger->logInfo("No Facebook posts found for page ID $pid", __METHOD__.','.__LINE__);
        }
    }

Пример #3

Показать файл

Файл: class.TwitterCrawler.php Проект: ngugijames/ThinkUp

 /**
  * Fetch instance user's favorites since the last favorite stored.
  */
 public function fetchInstanceUserFavorites()
 {
     if (!isset($this->user)) {
         $this->fetchInstanceUserInfo();
     }
     $this->logger->logUserInfo("Checking for new favorites.", __METHOD__ . ',' . __LINE__);
     $last_fav_id = $this->instance->last_favorite_id;
     $this->logger->logInfo("Owner favs: " . $this->user->favorites_count . ", instance owner favs in system: " . $this->instance->owner_favs_in_system, __METHOD__ . ',' . __LINE__);
     $continue = true;
     while ($continue) {
         list($tweets, $http_status, $payload) = $this->getFavorites($last_fav_id);
         if ($http_status == 200) {
             if (sizeof($tweets) == 0) {
                 // then done -- this should happen when we have run out of favs
                 $this->logger->logInfo("It appears that we have run out of favorites to process", __METHOD__ . ',' . __LINE__);
                 $continue = false;
             } else {
                 $post_dao = DAOFactory::getDAO('FavoritePostDAO');
                 $fav_count = 0;
                 foreach ($tweets as $tweet) {
                     $tweet['network'] = 'twitter';
                     if ($post_dao->addFavorite($this->user->user_id, $tweet) > 0) {
                         URLProcessor::processPostURLs($tweet['post_text'], $tweet['post_id'], 'twitter', $this->logger);
                         $this->logger->logInfo("Found new fav: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__);
                         $fav_count++;
                         $this->logger->logInfo("Fav count: {$fav_count}", __METHOD__ . ',' . __LINE__);
                         $this->logger->logInfo("Added favorite: " . $tweet['post_id'], __METHOD__ . ',' . __LINE__);
                     } else {
                         // fav was already stored, so take no action. This could happen both because some
                         // of the favs on the given page were processed last time, or because a separate process,
                         // such as a UserStream process, is also watching for and storing favs.
                         //$status_message = "Have already stored fav ". $tweet['post_id'];
                         //$this->logger->logDebug($status_message, __METHOD__.','.__LINE__);
                     }
                     // keep track of the highest fav id we've encountered
                     if ($tweet['post_id'] > $last_fav_id) {
                         $last_fav_id = $tweet['post_id'];
                     }
                 }
                 // end foreach
             }
         } else {
             $continue = false;
         }
     }
 }

Пример #4

Показать файл

Файл: class.FacebookCrawler.php Проект: dgw/ThinkUp

 /**
  * Fetch and save the posts and replies for the crawler's instance. This function will loop back through the
  * user's or pages archive of posts.
  * @return void
  * @throws APIOAuthException
  */
 public function fetchPostsAndReplies()
 {
     $id = $this->instance->network_user_id;
     $network = $this->instance->network;
     // fetch user's friends
     $this->fetchAndStoreFriends();
     $fetch_next_page = true;
     $current_page_number = 1;
     $next_api_request = 'https://graph.facebook.com/' . $id . '/feed?access_token=' . $this->access_token;
     //Cap crawl time for very busy pages with thousands of likes/comments
     $fetch_stop_time = time() + $this->max_crawl_time;
     //Determine 'since', datetime of oldest post in datastore
     $post_dao = DAOFactory::getDAO('PostDAO');
     $since_post = $post_dao->getAllPosts($id, $network, 1, 1, true, 'pub_date', 'ASC');
     $since = isset($since_post[0]) ? $since_post[0]->pub_date : 0;
     $since = strtotime($since) - 60 * 60 * 24;
     // last post minus one day, just to be safe
     $since < 0 ? $since = 0 : ($since = $since);
     while ($fetch_next_page) {
         $stream = FacebookGraphAPIAccessor::rawApiRequest($next_api_request, true);
         if (isset($stream->data) && is_array($stream->data) && sizeof($stream->data) > 0) {
             $this->logger->logInfo(sizeof($stream->data) . " Facebook posts found on page " . $current_page_number, __METHOD__ . ',' . __LINE__);
             $this->processStream($stream, $network, $current_page_number);
             if (isset($stream->paging->next)) {
                 $next_api_request = $stream->paging->next . '&since=' . $since;
                 $current_page_number++;
             } else {
                 $fetch_next_page = false;
             }
         } elseif (isset($stream->error->type) && $stream->error->type == 'OAuthException') {
             throw new APIOAuthException($stream->error->message);
         } else {
             $this->logger->logInfo("No Facebook posts found for ID {$id}", __METHOD__ . ',' . __LINE__);
             $fetch_next_page = false;
         }
         if (time() > $fetch_stop_time) {
             $fetch_next_page = false;
             $this->logger->logUserInfo("Stopping this service user's crawl because it has exceeded max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__);
         }
     }
 }

Пример #5

Показать файл

Файл: class.ExpandURLsPlugin.php Проект: dgw/ThinkUp

 /**
  * Expand Bit.ly links and recheck click count on any links less than 2 days old.
  *
  * @param str bitly api key
  * @param str bitly login name
  */
 public function acquireBitlyClickStats($api_key, $bit_login)
 {
     $this->logger->setUsername(null);
     $api_accessor = new BitlyAPIAccessor($api_key, $bit_login);
     $bitly_urls = array('http://bit.ly/', 'http://bitly.com/', 'http://j.mp/');
     foreach ($bitly_urls as $bitly_url) {
         if ($this->link_limit != 0) {
             //all short links first seen in the last 48 hours
             $bitly_links_to_update = $this->short_link_dao->getLinksToUpdate($bitly_url);
             if (count($bitly_links_to_update) > 0) {
                 $this->logger->logUserInfo(count($bitly_links_to_update) . " {$bitly_url}" . " links to acquire click stats for.", __METHOD__ . ',' . __LINE__);
             } else {
                 $this->logger->logUserInfo("There are no " . $bitly_url . " links to fetch click stats for.", __METHOD__ . ',', __LINE__);
             }
             $total_links = 0;
             $total_errors = 0;
             $total_updated = 0;
             foreach ($bitly_links_to_update as $link) {
                 $this->logger->logInfo("Getting bit.ly click stats for " . ($total_updated + 1) . " of " . count($bitly_links_to_update) . " " . $bitly_url . " links (" . $link->short_url . ")", __METHOD__ . ',' . __LINE__);
                 $link_data = $api_accessor->getBitlyLinkData($link->short_url);
                 if ($link_data["clicks"] != '') {
                     //save click total here
                     $this->short_link_dao->saveClickCount($link->short_url, $link_data["clicks"]);
                     // Save title to links table
                     if ($link_data["title"] != '') {
                         $this->link_dao->updateTitle($link->link_id, $link_data["title"]);
                     }
                     $total_links = $total_links + 1;
                     $total_updated = $total_updated + 1;
                 } elseif ($link_data["error"] != '') {
                     $this->link_dao->saveExpansionError($link->short_url, $link_data["error"]);
                     $total_errors = $total_errors + 1;
                     $total_updated = $total_updated + 1;
                 }
             }
             $this->logger->logUserSuccess($total_links . " " . $bitly_url . " link click stats acquired (" . $total_errors . " errors)", __METHOD__ . ',' . __LINE__);
         }
     }
 }

Пример #6

Показать файл

Файл: class.TwitterCrawler.php Проект: randi2kewl/ThinkUp

 /**
  * This method, and the two supporting private methods 'maintFavsFetch' and 'archivingFavsFetch', provide the
  * primary crawler functionality for adding the user's favorites to the database.
  * For a given user, the process starts in 'archiving mode', by
  * working forwards from the last (oldest) page of tweets to the newest.  This archiving crawl
  * is only done once.  The crawler tries to do this all in one go, but if it exhausts the available API count,
  * it will continue where it left off in the next run.
  * Then, when page 1 is reached in archiving mode, the crawler goes into 'maintenance mode' and works
  * backwards from then on.  It first pages back until
  * it has reached the last fav it previously processed.  Then it searches back N more pages to catch any older
  * tweets that were fav'd out of chronological order, where N is determined by favs_older_pages option.
  * The bookkeeping for these two crawler stages is maintained in the in tu_instances entry for the user.
  *
  * Recently, the Twitter favorites API has developed some bugs that need to be worked around.  The comments below
  * provide more detail, but in a nutshell, these methods can not currently use information from Twitter to
  * calculate loop termination (so a bit more work may be done than necessary), and do not currently remove un-fav'd
  * tweets from the database.  Hopefully these API issues will be fixed by Twitter in future.
  */
 public function fetchInstanceFavorites()
 {
     // first, check that we have the resources to do work
     if (!($this->api->available && $this->api->available_api_calls_for_crawler)) {
         $this->logger->logInfo("terminating fetchInstanceFavorites-- no API calls available", __METHOD__ . ',' . __LINE__);
         return true;
     }
     $status_message = "";
     //@TODO Can we get this from API?
     $page_size = 20;
     // number of favs per page retrieved from the API call
     $this->logger->logUserInfo("Checking for new favorites.", __METHOD__ . ',' . __LINE__);
     $last_favorites_count = $this->instance->favorites_profile;
     $this->logger->logInfo("last favs count: {$last_favorites_count}", __METHOD__ . ',' . __LINE__);
     $last_page_fetched_favorites = $this->instance->last_page_fetched_favorites;
     $last_fav_id = $this->instance->last_favorite_id;
     $curr_favs_count = $this->user->favorites_count;
     $this->logger->logInfo("curr favs count: {$curr_favs_count}", __METHOD__ . ',' . __LINE__);
     $last_page_of_favs = round($this->api->archive_limit / $page_size);
     // under normal circs the latter clause below should never hold, but due to a previously-existing
     // bug that could set a negative last_page_fetched_favorites value in the db in some cases,
     // it is necessary for recovery.
     if ($last_page_fetched_favorites == "" || $last_page_fetched_favorites < 0) {
         $last_page_fetched_favorites = 0;
     }
     $this->logger->logInfo("got last_page_fetched_favorites: {$last_page_fetched_favorites}", __METHOD__ . ',' . __LINE__);
     if ($last_fav_id == "") {
         $last_fav_id = 0;
     }
     // the owner favs count, from twitter, is currently unreliable and may be less than the actual number of
     // favs, by a large margin.  So, we still go ahead and calculate the number of 'missing' tweets based on
     // this info, but currently do not use it for fetch loop termination.
     $this->logger->logInfo("owner favs: " . $this->user->favorites_count . ", instance owner favs in system: " . $this->instance->owner_favs_in_system, __METHOD__ . ',' . __LINE__);
     $favs_missing = $this->user->favorites_count - $this->instance->owner_favs_in_system;
     $this->logger->logInfo("favs missing: {$favs_missing}", __METHOD__ . ',' . __LINE__);
     // figure out if we're in 'archiving' or 'maintenance' mode, via # of last_page_fetched_favorites
     $mode = 0;
     // default is archving/first-fetch
     if ($last_page_fetched_favorites == 1) {
         $mode = 1;
         // we are in maint. mode
         $new_favs_to_add = $favs_missing;
         $this->logger->logInfo("new favs to add/missing: {$new_favs_to_add}", __METHOD__ . ',' . __LINE__);
         $mpage = 1;
         $starting_fav_id = $last_fav_id;
     } else {
         // we are in archiving mode.
         $new_favs_to_add = $curr_favs_count - $last_favorites_count;
         // twitter profile information is not always consistent, so ensure that this value is not negative
         if ($new_favs_to_add < 0) {
             $new_favs_to_add == 0;
         }
         $this->logger->logInfo("new favs to add: {$new_favs_to_add}", __METHOD__ . ',' . __LINE__);
         // figure out start page based on where we left off last time, and how many favs added since then
         $extra_pages = ceil($new_favs_to_add / $page_size);
         $this->logger->logInfo("extra pages: {$extra_pages}", __METHOD__ . ',' . __LINE__);
         $finished_first_fetch = false;
         if ($last_page_fetched_favorites == 0) {
             // if at initial starting fetch (first time favs ever crawled)
             if ($extra_pages == 0) {
                 $extra_pages = 1;
                 // always check at least one page on initial fetch
             }
             $last_page_fetched_favs_start = $extra_pages + 1;
         } else {
             $last_page_fetched_favs_start = $last_page_fetched_favorites + $extra_pages;
         }
         if ($last_page_fetched_favs_start > $last_page_of_favs) {
             $last_page_fetched_favs_start = $last_page_of_favs + 1;
         }
     }
     $status_message = "total last favs count: {$last_favorites_count}" . ", last page fetched: {$last_page_fetched_favorites}, last fav id: {$last_fav_id}";
     $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__);
     $this->logger->logInfo("current favs count: {$curr_favs_count}" . ", new favs to add: {$new_favs_to_add}, last page of favs: {$last_page_of_favs}, mode: {$mode}", __METHOD__ . ',' . __LINE__);
     $continue = true;
     $fcount = 0;
     $older_favs_smode = false;
     $stop_page = 0;
     $status_message = "in fetchInstanceFavorites: API available: " . $this->api->available . ", avail for crawler: " . $this->api->available_api_calls_for_crawler;
     $this->logger->logInfo($status_message, __METHOD__ . ',' . __LINE__);
     while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue) {
         try {
             if ($mode != 0) {
                 // in maintenance, not archiving mode
                 list($fcount, $mpage, $older_favs_smode, $stop_page, $new_favs_to_add, $last_fav_id, $last_page_fetched_favorites, $continue) = $this->maintFavsFetch($starting_fav_id, $fcount, $mpage, $older_favs_smode, $stop_page, $new_favs_to_add, $last_fav_id, $last_page_fetched_favorites, $continue);
                 // }
             } else {
                 // mode 0 -- archiving mode
                 if (!$finished_first_fetch) {
                     list($fcount, $last_fav_id, $last_page_fetched_favorites, $continue) = $this->archivingFavsFetch($fcount, $last_fav_id, $last_page_fetched_favs_start, $continue);
                     $finished_first_fetch = true;
                 } else {
                     list($fcount, $last_fav_id, $last_page_fetched_favorites, $continue) = $this->archivingFavsFetch($fcount, $last_fav_id, $last_page_fetched_favorites, $continue);
                 }
             }
         } catch (APICallLimitExceededException $e) {
             break;
         }
     }
     // end while
     // update necessary instance fields
     $this->logger->logInfo("new_favs_to_add: {$new_favs_to_add}, fcount: {$fcount}", __METHOD__ . ',' . __LINE__);
     $this->logger->logInfo("new 'last fav id': {$last_fav_id}", __METHOD__ . ',' . __LINE__);
     $this->instance->last_favorite_id = $last_fav_id;
     $this->instance->last_page_fetched_favorites = $last_page_fetched_favorites;
     $this->instance->favorites_profile = $curr_favs_count;
     $this->logger->logUserSuccess("Saved {$fcount} new favorites.", __METHOD__ . ',' . __LINE__);
     return true;
 }

Пример #7

Показать файл

Файл: class.FacebookCrawler.php Проект: pepeleproso/ThinkUp

 /**
  * Fetch and save the posts and replies for the crawler's instance. This function will loop back through the
  * user's or pages archive of posts.
  * @return void
  * @throws APIOAuthException
  */
 public function fetchPostsAndReplies()
 {
     $id = $this->instance->network_user_id;
     $network = $this->instance->network;
     $fetch_next_page = true;
     $current_page_number = 1;
     $next_api_request = $id . '/feed';
     $fields = self::$feed_fields;
     //Cap crawl time for very busy pages with thousands of likes/comments
     $fetch_stop_time = time() + $this->max_crawl_time;
     $api_request_params = null;
     $use_full_api_url = false;
     $dig_into_archives = false;
     while ($fetch_next_page) {
         if (!$use_full_api_url) {
             $stream = FacebookGraphAPIAccessor::apiRequest($next_api_request, $this->access_token, $api_request_params, $fields);
             $api_request_params = null;
         } else {
             //Use full paging URL
             $stream = FacebookGraphAPIAccessor::apiRequestFullURL($next_api_request, $this->access_token);
         }
         if (isset($stream->data) && is_array($stream->data) && sizeof($stream->data) > 0) {
             $this->logger->logInfo(sizeof($stream->data) . " Facebook posts found on page " . $current_page_number, __METHOD__ . ',' . __LINE__);
             $total_added_posts = $this->processStream($stream, $network, $current_page_number);
             if ($total_added_posts == 0) {
                 //No new posts were found, try going back into the archives
                 if (!$dig_into_archives) {
                     $dig_into_archives = true;
                     //Determine 'since', datetime of oldest post in datastore
                     $post_dao = DAOFactory::getDAO('PostDAO');
                     $since_post = $post_dao->getAllPosts($id, $network, 1, 1, true, 'pub_date', 'ASC');
                     $since = isset($since_post[0]) ? $since_post[0]->pub_date : 0;
                     $since = strtotime($since);
                     $this->logger->logInfo("No Facebook posts found for {$id} here, digging into archives since " . $since_post[0]->pub_date . " strtotime " . $since, __METHOD__ . ',' . __LINE__);
                     $api_request_params = array('since' => $since);
                     $use_full_api_url = false;
                     $next_api_request = $id . '/feed';
                 } else {
                     if (isset($stream->paging->next)) {
                         $next_api_request = $stream->paging->next;
                         $use_full_api_url = true;
                         //DEBUG
                         $this->logger->logInfo("Dug into archives, next page API request is " . $next_api_request, __METHOD__ . ',' . __LINE__);
                         $current_page_number++;
                     } else {
                         $fetch_next_page = false;
                     }
                 }
             } else {
                 if (isset($stream->paging->next)) {
                     $next_api_request = $stream->paging->next;
                     $use_full_api_url = true;
                     //DEBUG
                     $this->logger->logInfo("Next page API request is " . $next_api_request, __METHOD__ . ',' . __LINE__);
                     $current_page_number++;
                 } else {
                     $fetch_next_page = false;
                 }
             }
         } elseif (isset($stream->error->type) && $stream->error->type == 'OAuthException') {
             throw new APIOAuthException($stream->error->message);
         } else {
             $this->logger->logInfo("No Facebook posts found for ID {$id}", __METHOD__ . ',' . __LINE__);
             $fetch_next_page = false;
         }
         if (time() > $fetch_stop_time) {
             $fetch_next_page = false;
             $this->logger->logUserInfo("Stopping this service user's crawl because it has exceeded max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__);
         }
     }
 }

Пример #8

Показать файл

Файл: class.InstagramCrawler.php Проект: dgw/ThinkUp

 /**
  * Fetch and save the posts and replies for the crawler's instance. This function will loop back through the
  * user's or pages archive of posts.
  */
 public function fetchPostsAndReplies()
 {
     $plugin_dao = DAOFactory::getDAO('PluginDAO');
     $plugin_id = $plugin_dao->getPluginId('instagram');
     $namespace = OptionDAO::PLUGIN_OPTIONS . '-' . $plugin_id;
     $id = $this->instance->network_user_id;
     $option_dao = DAOFactory::getDAO('OptionDAO');
     $network = $this->instance->network;
     //Checks if last friends update is over 2 days ago and runs storeFriends if it is.
     $friends_last_updated = $option_dao->getOptionByName($namespace, 'last_crawled_friends');
     $friends_last_updated_check = microtime(true) - 172800;
     if ($friends_last_updated == NULL) {
         $this->storeFriends();
         $option_dao->insertOption($namespace, 'last_crawled_friends', microtime(true));
     } elseif ($friends_last_updated->option_value < $friends_last_updated_check) {
         $this->storeFriends();
         $option_dao->updateOptionByName($namespace, 'last_crawled_friends', microtime(true));
     }
     $fetch_next_page = true;
     $current_page_number = 1;
     $api_param = array();
     if ($this->instance->total_posts_in_system != 0) {
         $last_crawl = $this->instance->crawler_last_run;
         $crawl_less_week = date($last_crawl, strtotime("-1 week"));
         $unix_less_week = strtotime($crawl_less_week);
         $api_param = array('min_timestamp' => $unix_less_week, 'count' => 20);
     } else {
         $api_param = array('count' => 20);
     }
     $this->logger->logUserInfo("About to request media", __METHOD__ . ',' . __LINE__);
     $posts = InstagramAPIAccessor::apiRequest('media', $id, $this->access_token, $api_param);
     $this->logger->logUserInfo("Media requested", __METHOD__ . ',' . __LINE__);
     //Cap crawl time for very busy pages with thousands of likes/comments
     $fetch_stop_time = time() + $this->max_crawl_time;
     //Determine 'since', datetime of oldest post in datastore
     $post_dao = DAOFactory::getDAO('PostDAO');
     $since_post = $post_dao->getAllPosts($id, $network, 1, 1, true, 'pub_date', 'ASC');
     $since = isset($since_post[0]) ? $since_post[0]->pub_date : 0;
     $since = strtotime($since) - 60 * 60 * 24;
     // last post minus one day, just to be safe
     if ($since < 0) {
         $since = 0;
     } else {
         $since = $since;
     }
     while ($fetch_next_page) {
         if ($posts->count() > 0) {
             $this->logger->logInfo(sizeof($stream->data) . " Instagram posts found on page " . $current_page_number, __METHOD__ . ',' . __LINE__);
             $this->processPosts($posts, $network, $current_page_number);
             if ($posts->getNext() != null) {
                 $api_param['max_id'] = $posts->getNext();
                 $posts = InstagramAPIaccessor::apiRequest('media', $id, $this->access_token, $api_param);
                 $current_page_number++;
             } else {
                 $fetch_next_page = false;
             }
         } else {
             $this->logger->logInfo("No Instagram posts found for ID {$id}", __METHOD__ . ',' . __LINE__);
             $fetch_next_page = false;
         }
         if (time() > $fetch_stop_time) {
             $fetch_next_page = false;
             $this->logger->logUserInfo("Stopping this service user's crawl because it has exceeded max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__);
         }
     }
 }

Пример #9

Показать файл

Файл: class.YouTubeCrawler.php Проект: dgw/ThinkUp

 /**
  *  Collects and stores information about the users videos from the YouTube APIs
  *  Currently collects and stores:
  *   - Basic video information such as title, author, description and location the video was shot in (if available)
  *  - Replies to the video
  *      -- This uses the YouTube V2 API due to the V3 API currently not supporting replies
  *   - All time counts for likes, dislikes, views, average view duration, average view percentage, favorites added,
  *   favorites removed, shares, subscribers gained and subscribers lost
  *     -- The totals for these are stored in the videos table, a history of these totals is stored in the
  *     count_history table under a type of [metric]_all_time and date of todays date
  *    -- A record of these metrics for indivdual days is also saved in the count_history table under a type of
  *    [metric] and date of the day the metric represents usually two days ago due to a delay in the availability
  *      of data from the Analytics API
  * @return null
  */
 public function fetchInstanceUserVideos()
 {
     $video_dao = DAOFactory::getDAO('VideoDAO');
     $user_dao = DAOFactory::getDAO('UserDAO');
     $post_dao = DAOFactory::getDAO('PostDAO');
     $count_history_dao = DAOFactory::getDAO('CountHistoryDAO');
     $instance_dao = DAOFactory::getDAO('InstanceDAO');
     // Get the users upload playlist ID
     $fields_for_ids = array('part' => 'contentDetails,statistics', 'mine' => 'true');
     $various_ids = $this->youtube_api_accessor->apiRequest('channels', $this->access_token, $fields_for_ids);
     $upload_id = $various_ids->items[0]->contentDetails->relatedPlaylists->uploads;
     // Also get their channel ID as we'll need it later on
     $channel_id = $various_ids->items[0]->id;
     // There are some required attributes about the author that YouTube doesn't return for the videos so we need
     // to query the database for them
     $author_details = $user_dao->getDetails($this->instance->network_user_id, 'youtube');
     $user_id = $this->instance->network_user_id;
     // Update the users subscriber count
     $subscriber_count = $various_ids->items[0]->statistics->subscriberCount;
     $author_details->follower_count = $subscriber_count;
     $user_dao->updateUser($author_details);
     $count_history_dao->insert($user_id, 'youtube', $subscriber_count, null, 'subscriber_count');
     // Calculate the time at which we should stop fetching videos
     $end_time = time() + $this->max_crawl_time;
     // Keep track of if we finished the crawl early due to timing out
     $had_to_finish_early = false;
     // Check if we already loaded all the old posts for this user
     $archive_loaded = $instance->is_archive_loaded_posts;
     // If the archive isn't loaded yet keep track of how many times we've tried to load it
     if (!$archive_loaded) {
         $attempts = $count_history_dao->getLatestCountByNetworkUserIDAndType($user_id, 'youtube', 'youtube_archive_attempts');
         if ($attempts == null) {
             // If this is the first crawler run
             $attempts['count'] = 0;
         }
         $attempts['count']++;
         $count_history_dao->insert($user_id, 'youtube', $attempts['count'], null, 'youtube_archive_attempts', null);
     }
     // Now page through their videos collecting the data
     $videos_fields = array('part' => 'snippet', 'maxResults' => '25', 'playlistId' => $upload_id, 'pageToken' => null);
     // We may get multiple pages
     do {
         // This is a page of IDs of videos the user has uploaded
         $user_videos = $this->youtube_api_accessor->apiRequest('playlistItems', $this->access_token, $videos_fields);
         // For each video store the relevant details about it
         foreach ($user_videos->items as $video) {
             // If we've hit the max crawl time stop
             if (time() >= $end_time) {
                 $this->logger->logUserInfo("Stopping this service users crawl because it has exceeded max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__);
                 $had_to_finish_early = true;
                 break 2;
             }
             $video_id = $video->snippet->resourceId->videoId;
             // Get the title, description, likes, dislikes, views, and details about where
             // the video was taken from the data API
             $video_fields = array('id' => $video_id, 'part' => 'statistics,id,snippet,recordingDetails,status');
             $video_details = $this->youtube_api_accessor->apiRequest('videos', $this->access_token, $video_fields);
             $item = $video_details->items[0];
             // Check we haven't used up our quota
             if (isset($video_details->error)) {
                 $this->logger->logError('Error querying YouTube Data API V3 ', __METHOD__ . ',' . __LINE__);
                 break;
             }
             $video_attributes['post_text'] = $item->snippet->title;
             $video_attributes['description'] = $item->snippet->description;
             $video_attributes['likes'] = $item->statistics->likeCount;
             $video_attributes['dislikes'] = $item->statistics->dislikeCount;
             $video_attributes['views'] = $item->statistics->viewCount;
             // Keep track of these all time counts
             $count_history_dao->insert($user_id, 'youtube', $video_attributes['likes'], $video_id, 'likes_all_time');
             $count_history_dao->insert($user_id, 'youtube', $video_attributes['dislikes'], $video_id, 'dislikes_all_time');
             $count_history_dao->insert($user_id, 'youtube', $video_attributes['views'], $video_id, 'views_all_time');
             $video_attributes['pub_date'] = $item->snippet->publishedAt;
             $video_attributes['post_id'] = $item->id;
             $video_attributes['location'] = $item->recordingDetails->locationDescription;
             $video_attributes['place'] = $item->recordingDetails->locationDescription;
             if (isset($item->recordingDetails->latitude)) {
                 $video_attributes['geo'] = $item->recordingDetails->latitude . "," . $item->recordingDetails->longitude;
             }
             $video_attributes['is_protected'] = self::determinePrivacyStatus($item->status->privacyStatus);
             $today = date('Y-m-d');
             $upload_date = substr($item->snippet->publishedAt, 0, 10);
             // Get the favourites added, favourites removed, shares, subscribers gained, subscribers lost
             // estimated minuites watched, average view duration, average view percentage
             $analytics_fields = array('ids' => 'channel==' . $channel_id, 'start-date' => $upload_date, 'end-date' => $today, 'metrics' => 'favoritesAdded,favoritesRemoved,shares,subscribersGained,subscribersLost,' . 'estimatedMinutesWatched,averageViewDuration,averageViewPercentage,views,likes,dislikes', 'filters' => 'video==' . $video_id);
             $video_analytics_details = $this->youtube_analytics_api_accessor->apiRequest('reports', $this->access_token, $analytics_fields);
             // Check we haven't used up our quota
             if (isset($video_analytics_details->error)) {
                 $this->logger->logError('Error querying YouTube Analytics API', __METHOD__ . ',' . __LINE__);
                 break;
             }
             $analytics_item = $video_analytics_details->rows[0];
             // If the video is new we may not get any of these values back, but they can't be null
             if (isset($analytics_item)) {
                 $video_attributes['favorites_added'] = $analytics_item[0];
                 $video_attributes['favorites_removed'] = $analytics_item[1];
                 $video_attributes['shares'] = $analytics_item[2];
                 $video_attributes['subscribers_gained'] = $analytics_item[3];
                 $video_attributes['subscribers_lost'] = $analytics_item[4];
                 $video_attributes['minutes_watched'] = $analytics_item[5];
                 $video_attributes['average_view_duration'] = $analytics_item[6];
                 $video_attributes['average_view_percentage'] = $analytics_item[7];
                 // Keep track of these all time counts
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[0], $video_id, 'favorites_added_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[1], $video_id, 'favorites_removed_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[2], $video_id, 'shares_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[3], $video_id, 'subscribers_gained_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[4], $video_id, 'subscribers_lost_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[5], $video_id, 'minutes_watched_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[6], $video_id, 'average_view_duration_all_time');
                 $count_history_dao->insert($user_id, 'youtube', $analytics_item[7], $video_id, 'average_view_percentage_all_time');
             } else {
                 // So set them the 0
                 $video_attributes['favorites_added'] = 0;
                 $video_attributes['favorites_removed'] = 0;
                 $video_attributes['shares'] = 0;
                 $video_attributes['subscribers_gained'] = 0;
                 $video_attributes['subscribers_lost'] = 0;
                 $video_attributes['minutes_watched'] = 0;
                 $video_attributes['average_view_duration'] = 0;
                 $video_attributes['average_view_percentage'] = 0;
             }
             $video_attributes['author_user_id'] = $this->instance->network_user_id;
             $video_attributes['author_username'] = $this->instance->network_username;
             $video_attributes['author_fullname'] = $author_details->full_name;
             $video_attributes['author_avatar'] = $author_details->avatar;
             $video_attributes['source'] = '';
             $video_attributes['network'] = 'youtube';
             $video_dao->addVideo($video_attributes);
             // Now collect per day count data for 2 days ago (testing has shown analytics data is delayed by 2 days)
             $two_days_ago = date('Y-m-d', strtotime("-2 day", strtotime($today)));
             $analytics_fields['start-date'] = $two_days_ago;
             $analytics_fields['end-date'] = $two_days_ago;
             $analytics_today_details = $this->youtube_analytics_api_accessor->apiRequest('reports', $this->access_token, $analytics_fields);
             // Check we haven't used up our quota
             if (isset($analytics_today_details->error)) {
                 $this->logger->logError('Error querying YouTube Analytics API', __METHOD__ . ',' . __LINE__);
                 break;
             }
             $todays_analytics = $analytics_today_details->rows[0];
             // Check we got data and if not skip this part
             if (isset($todays_analytics)) {
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[0], $video_id, 'favorites_added', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[1], $video_id, 'favorites_removed', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[2], $video_id, 'shares', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[3], $video_id, 'subscribers_gained', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[4], $video_id, 'subscribers_lost', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[5], $video_id, 'minutes_watched', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[6], $video_id, 'average_view_duration', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[7], $video_id, 'average_view_percentage', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[8], $video_id, 'views', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[9], $video_id, 'likes', $two_days_ago);
                 $count_history_dao->insert($user_id, 'youtube', $todays_analytics[10], $video_id, 'dislikes', $two_days_ago);
             }
             // Check to see how many comments we already have for this video and if there are no new ones skip
             // comment collection as it takes a long time.
             $video_in_db = $video_dao->getVideoByID($video_id, 'youtube');
             $comments_in_db = $video_in_db->reply_count_cache;
             if (!isset($comments_in_db)) {
                 $comments_in_db = 0;
             }
             $api_comments = $item->statistics->commentCount;
             $comments_collected = 0;
             // if this video has any new comments capture those
             if ($api_comments > 0 && $api_comments > $comments_in_db) {
                 // Request the first page of comments for this video
                 $comments_fields = array('alt' => 'json');
                 if (isset($this->developer_key)) {
                     $comments_fields['key'] = $this->developer_key;
                 }
                 $comments = $this->youtube_api_v2_accessor->apiRequest('videos/' . $video_id . '/comments', $comments_fields);
                 // Check we haven't used up our quota
                 if (isset($comments->errors)) {
                     $this->logger->logError('Error querying YouTube Data API V2 ', __METHOD__ . ',' . __LINE__);
                     break;
                 }
                 do {
                     // Iterate through each comment and store the details
                     foreach ($comments->feed->entry as $comment) {
                         // We may have only needed to collect a few new comments so abort if we have everything
                         if ($api_comments == $comments_in_db) {
                             break 2;
                         }
                         // If the user has specified a limit on the number of comments per video to collect each
                         // crawl check we haven't exceeded it
                         if (isset($this->maximum_comments) && $comments_collected >= $this->maximum_comments) {
                             $this->logger->logUserInfo("Stopping collection of comments for video due to reaching " . "limit of " . $this->maximum_comments . " comments.", __METHOD__ . ',' . __LINE__);
                             break 2;
                         }
                         // We may spend a long time collecting comments so also check here if we've exceed the max
                         // time specified by the user
                         if (time() >= $end_time) {
                             $this->logger->logUserInfo("Stopping this service users crawl because it has exceeded " . "max time of " . $this->max_crawl_time / 60 . " minute(s). ", __METHOD__ . ',' . __LINE__);
                             $had_to_finish_early = true;
                             break 4;
                         }
                         // The id is returned in the XML as part of a long URL, we only want the last part of that
                         // URL
                         $id_string = explode('/', $comment->id->{'$t'});
                         // This will be the last element of id_string
                         $comment_store['post_id'] = $id_string[sizeof($id_string) - 1];
                         // The post text is the comment they made
                         // Remove byte order markers from the comment text from:
                         // http://stackoverflow.com/questions/3255993/how-do-i-remove-i-from-the-beginning
                         // -of-a-file#comment9330944_3256183
                         $comment_store['post_text'] = preg_replace('/\\x{EF}\\x{BB}\\x{BF}/', '', $comment->content->{'$t'});
                         // The author username is the users G+ displayname which we need to query for
                         // To get the G+ ID of this commentor we need to vist their youtube profile page, the ID
                         // needed to get to this users page is the last element of the author URI
                         $user_id_string = explode('/', $comment->author[0]->uri->{'$t'});
                         $name = $this->youtube_api_v2_accessor->apiRequest('users/' . $user_id_string[sizeof($user_id_string) - 1], $comments_fields);
                         $gplus_id = $name->entry->{'yt$googlePlusUserId'}->{'$t'};
                         // // Now we have their G+ ID we can get their details from the G+ API
                         $gplus_fields = array('fields' => 'displayName,id,image,tagline,verified');
                         $user_details = $this->google_plus_api_accessor->apiRequest('people/' . $gplus_id, $this->access_token, $gplus_fields);
                         // Sometimes G+ says the ID is invalid or the user doesn't have a G+ ID
                         if ($user_details->error->code == '404' || $gplus_id == '') {
                             // Use V2 of the YouTube api to get their details
                             $comment_store['author_username'] = $name->entry->{'yt$username'}->{'$t'};
                             $comment_store['author_fullname'] = $name->entry->author[0]->name->{'$t'};
                             $comment_store["author_avatar"] = $name->entry->{'media$thumbnail'}->url;
                             // In this case the user id is their YouTube user ID
                             $comment_store['author_user_id'] = $user_id_string[sizeof($user_id_string) - 1];
                             self::fetchUserFromYouTube($user_id_string[sizeof($user_id_string) - 1], 'youtube_crawler');
                             // If we still didn't get these details we can't store this comment
                             if ($comment_store['author_username'] == null || $comment_store['author_fullname'] == null || $comment_store["author_avatar"] == null) {
                                 continue;
                             }
                         } elseif (isset($user_details->error)) {
                             //Check we haven't exceed the G+ API quota
                             $this->logger->logError('Error querying Google Plus API ', __METHOD__ . ',' . __LINE__);
                             break;
                         } else {
                             $comment_store['author_username'] = $user_details->displayName;
                             $comment_store['author_fullname'] = $user_details->displayName;
                             $comment_store["author_avatar"] = $user_details->image->url;
                             // The author user id is their G+ ID
                             $comment_store['author_user_id'] = $gplus_id;
                             // Make sure we have this commentor in the database
                             self::fetchUser($gplus_id, 'youtube crawler');
                         }
                         // The date they posted the comment
                         $comment_store['pub_date'] = substr($comment->published->{'$t'}, 0, 10) . " " . substr($comment->published->{'$t'}, 11, 8);
                         // Source of the comment
                         $comment_store['source'] = "";
                         // Comments can not be private
                         $comment_store['is_protected'] = false;
                         // Set the network to youtube
                         $comment_store['network'] = 'youtube';
                         // The ID of the author of the video
                         $comment_store['in_reply_to_user_id'] = $this->instance->network_user_id;
                         // The ID of the video this comment is a reply to
                         $comment_store['in_reply_to_post_id'] = $video_id;
                         $insert_id = $post_dao->addPost($comment_store);
                         // If the insert id is null and were not going back to collect the whole archive
                         // we've already captured comments from this point so move on
                         if ($insert_id == null && $archive_loaded) {
                             break 2;
                         }
                         $comments_in_db++;
                         $comments_collected++;
                     }
                     $test = self::determineIfMoreCommentsExist($comments, $video_id);
                     // If there is another page of comments make a request for them
                     if ($test['next']) {
                         $comments = $this->youtube_api_v2_accessor->basicApiRequest($test['url']);
                         // Check we haven't used up our quota
                         if (isset($comments->errors)) {
                             $this->logger->logError('Error querying YouTube Data API V2 ', __METHOD__ . ',' . __LINE__);
                             break;
                         } elseif ($comments == null) {
                             // If the comments come back as null its because we've been making too many requests too
                             // quickly The YouTube api doesn't return valid JSON telling us this though so
                             // json_decode returns null so back off for 30 seconds and then try again
                             $error_message = "Querying the YouTube API too often waiting for 30 seconds, to ";
                             $error_message .= "prevent this delay add a developer key.";
                             $this->logger->logError($error_message, __METHOD__ . ',' . __LINE__);
                             sleep(30);
                             $comments = $this->youtube_api_v2_accessor->basicApiRequest($test['url']);
                         }
                     }
                 } while ($test['next']);
             }
             // If we have another page of videos then get the token for the page
             if (isset($user_videos->nextPageToken)) {
                 $videos_fields['pageToken'] = $user_videos->nextPageToken;
             }
         }
         // If we have another page of videos and haven't loaded all this users video yet keep going
         // if we have loaded all this users videos then stop after 1 page (50 videos)
     } while (isset($user_videos->nextPageToken) && !$archive_loaded);
     // If we didn't have to finish the crawl early due to timing out we have collected all this users videos or
     // we have tried more than 20 times stop trying to go back and load the post archive
     if (!$had_to_finish_early || $attempts >= 20) {
         $instance_dao->setPostArchiveLoaded($user_id, 'youtube');
     }
 }

Пример #10

Показать файл

Файл: class.FacebookCrawler.php Проект: rgroves/ThinkUp

 /**
  * Convert parsed JSON of a profile or page's posts into ThinkUp posts and users
  * @param Object $stream
  * @param str $source The network for the post; by default 'facebook'
  */
 private function processStream($stream, $network)
 {
     $thinkup_posts = array();
     $total_added_posts = 0;
     $thinkup_users = array();
     $total_added_users = 0;
     $thinkup_links = array();
     $total_links_added = 0;
     $thinkup_likes = array();
     $total_added_likes = 0;
     $profile = null;
     $post_dao = DAOFactory::getDAO('PostDAO');
     $must_process_likes = true;
     $must_process_comments = true;
     foreach ($stream->data as $p) {
         $post_id = explode("_", $p->id);
         $post_id = $post_id[1];
         if ($profile == null) {
             $profile = $this->fetchUserInfo($p->from->id, $network, 'Post stream');
         }
         //assume profile comments are private and page posts are public
         $is_protected = $network == 'facebook' ? 1 : 0;
         //get likes count
         $likes_count = 0;
         if (isset($p->likes)) {
             if (is_int($p->likes)) {
                 $likes_count = $p->likes;
             } elseif (isset($p->likes->count) && is_int($p->likes->count)) {
                 $likes_count = $p->likes->count;
             }
         }
         //Figure out if we have to process likes and comments
         $post_in_storage = $post_dao->getPost($post_id, $network);
         if (isset($post_in_storage)) {
             if ($post_in_storage->favlike_count_cache >= $likes_count) {
                 $must_process_likes = false;
                 $this->logger->logInfo("Already have " . $likes_count . " likes for post ID " . $post_id . "; Skipping like processing this crawler run", __METHOD__ . ',' . __LINE__);
             }
             if (isset($p->comments->count)) {
                 if ($post_in_storage->reply_count_cache >= $p->comments->count) {
                     $must_process_comments = false;
                     $this->logger->logInfo("Already have " . $p->comments->count . " comments for post ID " . $post_id . "; Skipping comments processing", __METHOD__ . ',' . __LINE__);
                 }
             }
         }
         if (isset($profile) && !isset($post_in_storage)) {
             $posts_to_process = array("post_id" => $post_id, "author_username" => $profile->username, "author_fullname" => $profile->username, "author_avatar" => $profile->avatar, "author_user_id" => $p->from->id, "post_text" => isset($p->message) ? $p->message : '', "pub_date" => $p->created_time, "favlike_count_cache" => $likes_count, "in_reply_to_user_id" => '', "in_reply_to_post_id" => '', "source" => '', 'network' => $network, 'is_protected' => $is_protected, 'location' => $profile->location);
             array_push($thinkup_posts, $posts_to_process);
             $total_added_posts = $total_added_posts + $this->storePostsAndAuthors($thinkup_posts, "Owner stream");
             //free up memory
             $thinkup_posts = array();
             if (isset($p->source) || isset($p->link)) {
                 // there's a link to store
                 $link_url = isset($p->source) ? $p->source : $p->link;
                 $link = new Link(array("url" => $link_url, "expanded_url" => $link_url, "image_src" => isset($p->picture) ? $p->picture : '', "caption" => isset($p->caption) ? $p->caption : '', "description" => isset($p->description) ? $p->description : '', "title" => isset($p->name) ? $p->name : '', "network" => $network, "post_id" => $post_id));
                 array_push($thinkup_links, $link);
             }
             $total_links_addded = $total_links_added + $this->storeLinks($thinkup_links);
             if ($total_links_added > 0) {
                 $this->logger->logUserSuccess("Collected {$total_links_added} new links", __METHOD__ . ',' . __LINE__);
             }
             //free up memory
             $thinkup_links = array();
         }
         if ($must_process_comments) {
             if (isset($p->comments)) {
                 $comments_captured = 0;
                 if (isset($p->comments->data)) {
                     $post_comments = $p->comments->data;
                     $post_comments_count = isset($post_comments) ? sizeof($post_comments) : 0;
                     if (is_array($post_comments) && sizeof($post_comments) > 0) {
                         foreach ($post_comments as $c) {
                             if (isset($c->from)) {
                                 $comment_id = explode("_", $c->id);
                                 $comment_id = $comment_id[2];
                                 //Get posts
                                 $posts_to_process = array("post_id" => $comment_id, "author_username" => $c->from->name, "author_fullname" => $c->from->name, "author_avatar" => 'https://graph.facebook.com/' . $c->from->id . '/picture', "author_user_id" => $c->from->id, "post_text" => $c->message, "pub_date" => $c->created_time, "in_reply_to_user_id" => $profile->user_id, "in_reply_to_post_id" => $post_id, "source" => '', 'network' => $network, 'is_protected' => $is_protected, 'location' => '');
                                 array_push($thinkup_posts, $posts_to_process);
                                 $comments_captured = $comments_captured + 1;
                             }
                         }
                     }
                 }
                 $total_added_posts = $total_added_posts + $this->storePostsAndAuthors($thinkup_posts, "Post stream comments");
                 //free up memory
                 $thinkup_posts = array();
                 // collapsed comment thread
                 if (isset($p->comments->count) && $p->comments->count > $comments_captured) {
                     $api_call = 'https://graph.facebook.com/' . $p->from->id . '_' . $post_id . '/comments?access_token=' . $this->access_token;
                     do {
                         $comments_stream = FacebookGraphAPIAccessor::rawApiRequest($api_call);
                         if (isset($comments_stream) && is_array($comments_stream->data)) {
                             foreach ($comments_stream->data as $c) {
                                 if (isset($c->from)) {
                                     $comment_id = explode("_", $c->id);
                                     $comment_id = $comment_id[sizeof($comment_id) - 1];
                                     //Get posts
                                     $posts_to_process = array("post_id" => $comment_id, "author_username" => $c->from->name, "author_fullname" => $c->from->name, "author_avatar" => 'https://graph.facebook.com/' . $c->from->id . '/picture', "author_user_id" => $c->from->id, "post_text" => $c->message, "pub_date" => $c->created_time, "in_reply_to_user_id" => $profile->user_id, "in_reply_to_post_id" => $post_id, "source" => '', 'network' => $network, 'is_protected' => $is_protected, 'location' => '');
                                     array_push($thinkup_posts, $posts_to_process);
                                 }
                             }
                             $total_added_posts = $total_added_posts + $this->storePostsAndAuthors($thinkup_posts, "Posts stream comments collapsed");
                             //free up memory
                             $thinkup_posts = array();
                             if (isset($comments_stream->paging->next)) {
                                 $api_call = str_replace('\\u00257C', '|', $comments_stream->paging->next);
                             }
                         } else {
                             // no comments (pun intended)
                             break;
                         }
                     } while (isset($comments_stream->paging->next));
                 }
             }
         }
         //process "likes"
         if ($must_process_likes) {
             if (isset($p->likes)) {
                 $likes_captured = 0;
                 if (isset($p->likes->data)) {
                     $post_likes = $p->likes->data;
                     $post_likes_count = isset($post_likes) ? sizeof($post_likes) : 0;
                     if (is_array($post_likes) && sizeof($post_likes) > 0) {
                         foreach ($post_likes as $l) {
                             if (isset($l->name) && isset($l->id)) {
                                 //Get users
                                 $ttu = array("user_name" => $l->name, "full_name" => $l->name, "user_id" => $l->id, "avatar" => 'https://graph.facebook.com/' . $l->id . '/picture', "location" => '', "description" => '', "url" => '', "is_protected" => 1, "follower_count" => 0, "post_count" => 0, "joined" => '', "found_in" => "Likes", "network" => 'facebook');
                                 //Users are always set to network=facebook
                                 array_push($thinkup_users, $ttu);
                                 $fav_to_add = array("favoriter_id" => $l->id, "network" => $network, "author_user_id" => $profile->user_id, "post_id" => $post_id);
                                 array_push($thinkup_likes, $fav_to_add);
                                 $likes_captured = $likes_captured + 1;
                             }
                         }
                     }
                 }
                 $total_added_users = $total_added_users + $this->storeUsers($thinkup_users, "Likes");
                 $total_added_likes = $total_added_likes + $this->storeLikes($thinkup_likes);
                 //free up memory
                 $thinkup_users = array();
                 $thinkup_likes = array();
                 // collapsed likes
                 if (isset($p->likes->count) && $p->likes->count > $likes_captured) {
                     $api_call = 'https://graph.facebook.com/' . $p->from->id . '_' . $post_id . '/likes?access_token=' . $this->access_token;
                     do {
                         $likes_stream = FacebookGraphAPIAccessor::rawApiRequest($api_call);
                         if (isset($likes_stream) && is_array($likes_stream->data)) {
                             foreach ($likes_stream->data as $l) {
                                 if (isset($l->name) && isset($l->id)) {
                                     //Get users
                                     $ttu = array("user_name" => $l->name, "full_name" => $l->name, "user_id" => $l->id, "avatar" => 'https://graph.facebook.com/' . $l->id . '/picture', "location" => '', "description" => '', "url" => '', "is_protected" => 1, "follower_count" => 0, "post_count" => 0, "joined" => '', "found_in" => "Likes", "network" => 'facebook');
                                     //Users are always set to network=facebook
                                     array_push($thinkup_users, $ttu);
                                     $fav_to_add = array("favoriter_id" => $l->id, "network" => $network, "author_user_id" => $p->from->id, "post_id" => $post_id);
                                     array_push($thinkup_likes, $fav_to_add);
                                     $likes_captured = $likes_captured + 1;
                                 }
                             }
                             $total_added_users = $total_added_users + $this->storeUsers($thinkup_users, "Likes");
                             $total_added_likes = $total_added_likes + $this->storeLikes($thinkup_likes);
                             //free up memory
                             $thinkup_users = array();
                             $thinkup_likes = array();
                             if (isset($likes_stream->paging->next)) {
                                 $api_call = str_replace('\\u00257C', '|', $likes_stream->paging->next);
                             }
                         } else {
                             // no likes
                             break;
                         }
                     } while (isset($likes_stream->paging->next));
                 }
             }
             //free up memory
             $thinkup_users = array();
             $thinkup_likes = array();
         }
     }
     if ($total_added_posts > 0) {
         $this->logger->logUserSuccess("Collected {$total_added_posts} posts", __METHOD__ . ',' . __LINE__);
     } else {
         $this->logger->logUserInfo("No new posts found.", __METHOD__ . ',' . __LINE__);
     }
     if ($total_added_users > 0) {
         $this->logger->logUserSuccess("Collected {$total_added_users} users", __METHOD__ . ',' . __LINE__);
     } else {
         $this->logger->logUserInfo("No new users found.", __METHOD__ . ',' . __LINE__);
     }
     if ($total_added_likes > 0) {
         $this->logger->logUserSuccess("Collected {$total_added_likes} likes", __METHOD__ . ',' . __LINE__);
     } else {
         $this->logger->logUserInfo("No new likes found.", __METHOD__ . ',' . __LINE__);
     }
 }

Пример #11

Показать файл

Файл: class.TwitterCrawler.php Проект: rkabir/ThinkUp

    /**
     * This method, and the two supporting private methods 'maintFavsFetch' and 'archivingFavsFetch', provide the
     * primary crawler functionality for adding the user's favorites to the database.
     * For a given user, the process starts in 'archiving mode', by
     * working forwards from the last (oldest) page of tweets to the newest.  This archiving crawl
     * is only done once.  The crawler tries to do this all in one go, but if it exhausts the available API count,
     * it will continue where it left off in the next run.
     * Then, when page 1 is reached in archiving mode, the crawler goes into 'maintenance mode' and works
     * backwards from then on.  It first pages back until
     * it has reached the last fav it previously processed.  Then it searches back N more pages to catch any older
     * tweets that were fav'd out of chronological order, where N is determined by favs_older_pages option.
     * The bookkeeping for these two crawler stages is maintained in the in tu_instances entry for the user.
     *
     * Recently, the Twitter favorites API has developed some bugs that need to be worked around.  The comments below
     * provide more detail, but in a nutshell, these methods can not currently use information from Twitter to
     * calculate loop termination (so a bit more work may be done than necessary), and do not currently remove un-fav'd
     * tweets from the database.  Hopefully these API issues will be fixed by Twitter in future.
     */
    public function fetchInstanceFavorites() {
        // first, check that we have the resources to do work
        if (!($this->api->available && $this->api->available_api_calls_for_crawler)) {
            $this->logger->logInfo("terminating fetchInstanceFavorites-- no API calls available",
            __METHOD__.','.__LINE__);
            return true;
        }

        $status_message = "";
        //@TODO Can we get this from API?
        $page_size = 20; // number of favs per page retrieved from the API call

        $this->logger->logUserInfo("Checking for new favorites.", __METHOD__.','.__LINE__);

        $last_favorites_count = $this->instance->favorites_profile;
        $this->logger->logInfo("last favs count: $last_favorites_count", __METHOD__.','.__LINE__);
        $last_page_fetched_favorites = $this->instance->last_page_fetched_favorites;
        $last_fav_id = $this->instance->last_favorite_id;
        $curr_favs_count = $this->user->favorites_count;
        $this->logger->logInfo("curr favs count: $curr_favs_count", __METHOD__.','.__LINE__);

        $last_page_of_favs = round($this->api->archive_limit / $page_size);

        if ($last_page_fetched_favorites == "") {
            $last_page_fetched_favorites = 0;
        }
        $this->logger->logInfo("got last_page_fetched_favorites: $last_page_fetched_favorites",
        __METHOD__.','.__LINE__);
        if ($last_fav_id == "") {
            $last_fav_id = 0;
        }

        // the owner favs count, from twitter, is currently unreliable and may be less than the actual number of
        // favs, by a large margin.  So, we still go ahead and calculate the number of 'missing' tweets based on
        // this info, but currently do not use it for fetch loop termination.
        $this->logger->logInfo("owner favs: " . $this->user->favorites_count . ", instance owner favs in system: ".
        $this->instance->owner_favs_in_system, __METHOD__.','.__LINE__);
        $favs_missing = $this->user->favorites_count - $this->instance->owner_favs_in_system;
        $this->logger->logInfo("favs missing: $favs_missing", __METHOD__.','.__LINE__);

        // figure out if we're in 'archiving' or 'maintenance' mode, via # of last_page_fetched_favorites
        $mode = 0; // default is archving/first-fetch
        if ($last_page_fetched_favorites == 1) {
            $mode = 1; // we are in maint. mode
            $new_favs_to_add = $favs_missing;
            $this->logger->logInfo("new favs to add/missing: $new_favs_to_add", __METHOD__.','.__LINE__);
            $mpage = 1;
            $starting_fav_id = $last_fav_id;
        } else {
            // we are in archiving mode.
            $new_favs_to_add = $curr_favs_count - $last_favorites_count;
            $this->logger->logInfo("new favs to add: $new_favs_to_add", __METHOD__.','.__LINE__);

            // figure out start page based on where we left off last time, and how many favs added since then
            $extra_pages = ceil($new_favs_to_add / $page_size);
            $this->logger->logInfo("extra pages: $extra_pages", __METHOD__.','.__LINE__);
            $finished_first_fetch = false;
            if ($last_page_fetched_favorites == 0) {
                // if at initial starting fetch (first time favs ever crawled)
                $last_page_fetched_favs_start = $extra_pages + 1;
            } else {
                $last_page_fetched_favs_start = $last_page_fetched_favorites + $extra_pages;
            }
            if ($last_page_fetched_favs_start > $last_page_of_favs) {
                $last_page_fetched_favs_start = $last_page_of_favs + 1;
            }
        }

        $status_message = "total last favs count: $last_favorites_count" .
           ", last page fetched: $last_page_fetched_favorites, last fav id: $last_fav_id";
        $this->logger->logInfo($status_message, __METHOD__.','.__LINE__);
        $this->logger->logInfo("current favs count: $curr_favs_count" .
               ", new favs to add: $new_favs_to_add, last page of favs: $last_page_of_favs, mode: $mode", 
        __METHOD__.','.__LINE__);

        $continue = true;
        $fcount = 0;
        $older_favs_smode = false;
        $stop_page = 0;

        $status_message = "in fetchInstanceFavorites: API available: ".$this->api->available.", avail for crawler: ".
        $this->api->available_api_calls_for_crawler;
        $this->logger->logInfo($status_message, __METHOD__.','.__LINE__);

        while ($this->api->available && $this->api->available_api_calls_for_crawler > 0 && $continue) {
            if ($mode != 0) { // in maintenance, not archiving mode
                list($fcount, $mpage, $older_favs_smode, $stop_page, $new_favs_to_add, $last_fav_id,
                $last_page_fetched_favorites, $continue) =
                $this->maintFavsFetch ($starting_fav_id, $fcount, $mpage, $older_favs_smode, $stop_page,
                $new_favs_to_add, $last_fav_id, $last_page_fetched_favorites, $continue);
                // }
            } else { // mode 0 -- archiving mode
                if (!$finished_first_fetch) {
                    $this->logger->logInfo("in 'first_archiving_fetch' clause", __METHOD__.','.__LINE__);
                    list($fcount, $last_fav_id, $last_page_fetched_favorites, $continue) =
                    $this->archivingFavsFetch($fcount, $last_fav_id, $last_page_fetched_favs_start, $continue);
                    $finished_first_fetch = true;
                } else {
                    list($fcount, $last_fav_id, $last_page_fetched_favorites, $continue) =
                    $this->archivingFavsFetch($fcount, $last_fav_id, $last_page_fetched_favorites, $continue);
                }
            }
        } // end while
        // update necessary instance fields
        $this->logger->logInfo("new_favs_to_add: $new_favs_to_add, fcount: $fcount", __METHOD__.','.__LINE__);
        $this->logger->logInfo("new 'last fav id': $last_fav_id", __METHOD__.','.__LINE__);

        $this->instance->last_favorite_id = $last_fav_id;
        $this->instance->last_page_fetched_favorites =$last_page_fetched_favorites;
        $this->instance->favorites_profile = $curr_favs_count;
        $this->logger->logUserSuccess("Saved $fcount new favorites.", __METHOD__.','.__LINE__);
        return true;
    }