Example #1
0
 public function run()
 {
     $valid_protocols = CURLPROTO_HTTP | CURLPROTO_HTTPS;
     $this->cookie_filename = make_cookie_filename($this->web_initiated ? $this->token : $this->settings_filename);
     if ($this->dbg) {
         $this->write_err('Set cookie_filename to "' . $this->cookie_filename . '".');
     }
     if (!$this->was_chained) {
         @unlink($this->cookie_filename);
         // Ensure that any existing cookie file on commandline reruns doesn't mess with us.
     }
     $this->ch = curl_init();
     if ($this->ch === false) {
         $this->exit_err('Failed to initialise cURL.', __FILE__, __METHOD__, __LINE__);
     }
     $opts = array(CURLOPT_USERAGENT => FUPS_USER_AGENT, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_HEADER => true, CURLOPT_TIMEOUT => 20, CURLOPT_COOKIEJAR => $this->cookie_filename, CURLOPT_COOKIEFILE => $this->cookie_filename, CURLOPT_PROTOCOLS => $valid_protocols, CURLOPT_REDIR_PROTOCOLS => $valid_protocols);
     if (!curl_setopt_array($this->ch, $opts)) {
         $this->exit_err('Failed to set the following cURL options:' . PHP_EOL . var_export($opts, true), __FILE__, __METHOD__, __LINE__);
     }
     # Login if necessary
     if ($this->supports_feature('login')) {
         if ($this->was_chained) {
             if ($this->dbg) {
                 $this->write_err('Not bothering to check whether to log in again, because we\'ve just chained.');
             }
         } else {
             $this->check_do_login();
         }
     }
     # Find all of the user's posts through the search feature
     if ($this->progress_level == 0) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         $this->check_get_username();
         $this->search_page_num = 1;
         $this->init_post_search_counter();
         $this->init_search_user_posts();
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__init_user_post_search();
     }
     if ($this->progress_level == 1) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         do {
             $this->write_status('Scraping search page for posts starting from page #' . $this->search_page_num . '.');
             $num_posts_found = $this->find_author_posts_via_search_page();
             if ($this->dbg) {
                 $this->write_err('Found ' . $num_posts_found . ' posts.');
             }
             $this->total_posts += $num_posts_found;
             $this->search_page_num++;
             $this->check_do_chain();
         } while ($this->progress_level == 1);
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level - 1];
         $this->{$hook_method}();
         // hook_after__user_post_search();
     }
     # Sort topics and posts
     if ($this->progress_level == 2) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         $this->write_status('Sorting posts and topics prior to scraping posts\' content.');
         # Sort topics in ascending alphabetical order
         uasort($this->posts_data, 'cmp_topics_topic');
         # Sort posts within each topic into ascending timestamp order
         foreach ($this->posts_data as $topicid => $dummy) {
             $posts =& $this->posts_data[$topicid]['posts'];
             uasort($posts, 'cmp_posts_date');
         }
         if ($this->dbg) {
             $this->write_err('SORTED POSTS::');
             foreach ($this->posts_data as $topicid => $topic) {
                 $this->write_err("\tTopic: {$topic['topic']}\tTopic ID: {$topicid}");
                 foreach ($topic['posts'] as $postid => $p) {
                     $newts = strftime('%c', $p['timestamp']);
                     $this->write_err("\t\tTime: {$newts} ({$p['ts']}); Post ID: {$postid}");
                 }
             }
         }
         $this->write_status('Finished sorting posts and topics. Now scraping contents of ' . $this->total_posts . ' posts.');
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__topic_post_sort();
     }
     # Retrieve the contents of all of the user's posts
     if ($this->progress_level == 3) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         # If the current topic ID is already set, then we are continuing after having chained.
         $go = is_null($this->current_topic_id);
         foreach ($this->posts_data as $topicid => $dummy) {
             if (!$go && $this->current_topic_id == $topicid) {
                 $go = true;
             }
             if ($go) {
                 $this->current_topic_id = $topicid;
                 $t =& $this->posts_data[$topicid];
                 $posts =& $t['posts'];
                 $done = false;
                 while (!$done) {
                     $done = true;
                     foreach ($posts as $postid => $dummy2) {
                         $p =& $posts[$postid];
                         if ($p['content'] == null && !isset($this->posts_not_found[$postid])) {
                             $this->get_post_contents($t['forumid'], $topicid, $postid);
                             $this->write_status('Retrieved ' . $this->num_posts_retrieved . ' of ' . $this->total_posts . ' posts.');
                             $done = false;
                         }
                         $this->check_do_chain();
                     }
                 }
             }
         }
         $this->current_topic_id = null;
         # Reset this for progress level 4
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__posts_retrieval();
     }
     # Extract per-thread information: thread author and forum
     if ($this->progress_level == 4) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         # If the current topic ID is already set, then we are continuing after having chained.
         $go = is_null($this->current_topic_id);
         $total_threads = count($this->posts_data);
         foreach ($this->posts_data as $topicid => $dummy) {
             if (!$go) {
                 if ($this->current_topic_id == $topicid) {
                     $go = true;
                 }
             } else {
                 $topic =& $this->posts_data[$topicid];
                 $url = $this->get_topic_url($topic['forumid'], $topicid);
                 $this->set_url($url);
                 $html = $this->do_send();
                 if (!$this->skins_preg_match('thread_author', $html, $matches)) {
                     $this->write_and_record_err_admin("Error: couldn't find a match for the author of the thread with topic id '{$topicid}'.  The URL of the page is <" . $url . '>.', __FILE__, __METHOD__, __LINE__, $html);
                     $topic['startedby'] = '???';
                 } else {
                     $topic['startedby'] = $matches[1];
                     if ($this->dbg) {
                         $this->write_err("Added author of '{$topic['startedby']}' for topic id '{$topicid}'.");
                     }
                     $this->num_thread_infos_retrieved++;
                     $this->write_status('Retrieved author and topic name for ' . $this->num_thread_infos_retrieved . ' of ' . $total_threads . ' threads.');
                 }
                 $this->current_topic_id = $topicid;
                 $this->check_do_chain();
             }
         }
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__extract_per_thread_info();
     }
     # Warn about missing posts
     if ($this->progress_level == 5) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         if ($this->posts_not_found) {
             $this->write_err(PHP_EOL . PHP_EOL . PHP_EOL . "The contents of the following posts were not found::" . PHP_EOL . PHP_EOL . PHP_EOL);
             foreach ($this->posts_not_found as $postid => $dummy) {
                 $a = $this->find_post($postid);
                 if ($a == false) {
                     $this->write_err("\tError: failed to find post with ID '{$postid}' in internal data.");
                 } else {
                     list($p, $t, $topicid) = $a;
                     $this->write_err("\t{$p['posttitle']} ({$t['topic']}; {$p['timestamp']}; {$t['forum']}; forumid: {$t['forumid']}; topicid: {$topicid}; postid: {$postid}; " . $this->get_post_url($t['forumid'], $topicid, $postid) . ')');
                 }
             }
         }
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__handle_missing_posts();
     }
     # Write output
     if ($this->progress_level == 6) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         $this->write_status('Writing output.');
         # Write all output variants
         $this->write_output();
         # Signal that we are done
         $this->write_status('DONE');
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__write_output();
     }
     # Potentially send an admin email re non-fatal errors.
     if ($this->progress_level == 7) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         if ($this->web_initiated) {
             $errs = file_get_contents(make_errs_filename($this->token));
             // Disable error messages because if there are no errors then this file
             // won't exist - we want to avoid an error message telling us as much.
             $errs_admin = @file_get_contents(make_errs_admin_filename($this->token));
             if ($errs || $errs_admin) {
                 $err_msg = '';
                 if ($errs) {
                     $len = strlen($errs);
                     $trunc_msg = '';
                     if ($len > FUPS_MAX_ERROR_FILE_EMAIL_LENGTH) {
                         $errs = substr($errs, 0, FUPS_MAX_ERROR_FILE_EMAIL_LENGTH);
                         $trunc_msg = ' (truncated from ' . number_format($len) . ' bytes to ' . number_format(FUPS_MAX_ERROR_FILE_EMAIL_LENGTH) . ' bytes)';
                     }
                     // No need to include the settings and classname if admin error info exists too,
                     // because settings and classname are already included each time the admin error
                     // file is appended to.
                     if (!$errs_admin) {
                         $settings_msg = static::get_settings_msg_s(static::get_settings_str());
                         $classname_msg = static::get_classname_msg_s(get_class($this));
                         $err_msg .= $settings_msg . PHP_EOL . PHP_EOL . $classname_msg . PHP_EOL;
                     }
                     $err_msg .= 'The following non-fatal errors were recorded in the error file' . $trunc_msg . ':' . PHP_EOL . PHP_EOL . $errs . PHP_EOL;
                 }
                 if ($errs_admin) {
                     if ($errs) {
                         $err_msg .= PHP_EOL . PHP_EOL;
                     }
                     $len = strlen($errs_admin);
                     $trunc_msg = '';
                     if ($len > FUPS_MAX_ADMIN_FILE_EMAIL_LENGTH) {
                         $errs_admin = substr($errs_admin, 0, FUPS_MAX_ADMIN_FILE_EMAIL_LENGTH);
                         $trunc_msg = ' (truncated from ' . number_format($len) . ' bytes to ' . number_format(FUPS_MAX_ADMIN_FILE_EMAIL_LENGTH) . ' bytes)';
                     }
                     $err_msg .= 'The following extended non-fatal error messages were recorded in the admin error file' . $trunc_msg . ':' . PHP_EOL . PHP_EOL . $errs_admin . PHP_EOL;
                 }
                 static::send_err_mail_to_admin_s($err_msg, $this->token, false);
             }
         }
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__check_send_non_fatal_err_email();
     }
 }
Example #2
0
            $output_dir = null;
            foreach ($output_info as $opv) {
                try_delete_file($opv['filepath'], '"' . $opv['filepath'] . '"', false, $err, $num_files_deleted, false);
                $output_dir = dirname($opv['filepath']);
            }
            @rmdir($output_dir);
        }
    }
    if (validate_token($token, $err)) {
        try_delete_file(make_settings_filename($token), 'settings', true, $err, $num_files_deleted);
        try_delete_file(make_status_filename($token), 'status', false, $err, $num_files_deleted);
        try_delete_file(make_errs_filename($token), 'error', false, $err, $num_files_deleted);
        try_delete_file(make_errs_admin_filename($token), 'errors (admin)', false, $err, $num_files_deleted, false);
        try_delete_file(make_output_info_filename($token), 'output info', false, $err, $num_files_deleted, false);
        try_delete_file(make_serialize_filename($token), 'serialisation', true, $err, $num_files_deleted);
        try_delete_file(make_cookie_filename($token), 'cookie', true, $err, $num_files_deleted, false);
        try_delete_file(make_cancellation_filename($token), 'cancellation', true, $err, $num_files_deleted, false);
    }
}
function try_delete_file($filename, $name, $sensitive, &$err, &$num_files_deleted, $add_err_if_file_not_present = true)
{
    global $fups_url_homepage;
    if (!is_file($filename)) {
        if ($add_err_if_file_not_present) {
            $err .= $err ? ' Another' : 'An';
            $err .= ' error occurred: the ' . $name . ' file does not exist on disk; possibly you have already deleted it or it was never created in the first place.';
        }
    } else {
        if (!unlink($filename)) {
            $err .= $err ? ' Another' : 'An';
            $err .= ' error occurred: failed to delete the ' . $name . ' file ' . ($sensitive ? '(contains username and password if you supplied them).' : '(does NOT contain either username or password).');