Exemple #1
0
function make_php_exec_cmd($params)
{
    $args = '';
    $prefix = '';
    $redirect = '1>/dev/null';
    $bg_token = '&';
    if (strtoupper(substr(PHP_OS, 0, 3)) === 'WIN') {
        $prefix = 'start /b ';
        $redirect = '1>NUL';
        $bg_token = '';
    }
    if (isset($params['token'])) {
        if ($args) {
            $args .= ' ';
        }
        $args .= '-t ' . escapeshellarg($params['token']);
        $errs_fname = make_errs_filename($params['token']);
        $redirect = ' 1>>' . $errs_fname . ' 2>&1';
    }
    if (isset($params['settings_filename'])) {
        if ($args) {
            $args .= ' ';
        }
        $args .= '-i ' . escapeshellarg($params['settings_filename']);
    }
    if (isset($params['output_filename'])) {
        if ($args) {
            $args .= ' ';
        }
        $args .= '-o ' . escapeshellarg($params['output_filename']);
    }
    if (isset($params['chained']) && $params['chained'] == true) {
        if ($args) {
            $args .= ' ';
        }
        $args .= '-c';
    }
    if (isset($params['quiet']) && $params['quiet'] == true) {
        if ($args) {
            $args .= ' ';
        }
        $args .= '-q';
    }
    $fups_path = realpath(__DIR__ . '/fups.php');
    if ($fups_path === false) {
        $fups_path = 'fups.php';
    }
    // Early return possible
    return $prefix . FUPS_CMDLINE_PHP_PATH . ' -d max_execution_time=0 ' . $fups_path . ' ' . $args . ' ' . $redirect . ' ' . $bg_token;
}
Exemple #2
0
 public function run()
 {
     $valid_protocols = CURLPROTO_HTTP | CURLPROTO_HTTPS;
     $this->cookie_filename = make_cookie_filename($this->web_initiated ? $this->token : $this->settings_filename);
     if ($this->dbg) {
         $this->write_err('Set cookie_filename to "' . $this->cookie_filename . '".');
     }
     if (!$this->was_chained) {
         @unlink($this->cookie_filename);
         // Ensure that any existing cookie file on commandline reruns doesn't mess with us.
     }
     $this->ch = curl_init();
     if ($this->ch === false) {
         $this->exit_err('Failed to initialise cURL.', __FILE__, __METHOD__, __LINE__);
     }
     $opts = array(CURLOPT_USERAGENT => FUPS_USER_AGENT, CURLOPT_FOLLOWLOCATION => false, CURLOPT_RETURNTRANSFER => true, CURLOPT_HEADER => true, CURLOPT_TIMEOUT => 20, CURLOPT_COOKIEJAR => $this->cookie_filename, CURLOPT_COOKIEFILE => $this->cookie_filename, CURLOPT_PROTOCOLS => $valid_protocols, CURLOPT_REDIR_PROTOCOLS => $valid_protocols);
     if (!curl_setopt_array($this->ch, $opts)) {
         $this->exit_err('Failed to set the following cURL options:' . PHP_EOL . var_export($opts, true), __FILE__, __METHOD__, __LINE__);
     }
     # Login if necessary
     if ($this->supports_feature('login')) {
         if ($this->was_chained) {
             if ($this->dbg) {
                 $this->write_err('Not bothering to check whether to log in again, because we\'ve just chained.');
             }
         } else {
             $this->check_do_login();
         }
     }
     # Find all of the user's posts through the search feature
     if ($this->progress_level == 0) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         $this->check_get_username();
         $this->search_page_num = 1;
         $this->init_post_search_counter();
         $this->init_search_user_posts();
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__init_user_post_search();
     }
     if ($this->progress_level == 1) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         do {
             $this->write_status('Scraping search page for posts starting from page #' . $this->search_page_num . '.');
             $num_posts_found = $this->find_author_posts_via_search_page();
             if ($this->dbg) {
                 $this->write_err('Found ' . $num_posts_found . ' posts.');
             }
             $this->total_posts += $num_posts_found;
             $this->search_page_num++;
             $this->check_do_chain();
         } while ($this->progress_level == 1);
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level - 1];
         $this->{$hook_method}();
         // hook_after__user_post_search();
     }
     # Sort topics and posts
     if ($this->progress_level == 2) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         $this->write_status('Sorting posts and topics prior to scraping posts\' content.');
         # Sort topics in ascending alphabetical order
         uasort($this->posts_data, 'cmp_topics_topic');
         # Sort posts within each topic into ascending timestamp order
         foreach ($this->posts_data as $topicid => $dummy) {
             $posts =& $this->posts_data[$topicid]['posts'];
             uasort($posts, 'cmp_posts_date');
         }
         if ($this->dbg) {
             $this->write_err('SORTED POSTS::');
             foreach ($this->posts_data as $topicid => $topic) {
                 $this->write_err("\tTopic: {$topic['topic']}\tTopic ID: {$topicid}");
                 foreach ($topic['posts'] as $postid => $p) {
                     $newts = strftime('%c', $p['timestamp']);
                     $this->write_err("\t\tTime: {$newts} ({$p['ts']}); Post ID: {$postid}");
                 }
             }
         }
         $this->write_status('Finished sorting posts and topics. Now scraping contents of ' . $this->total_posts . ' posts.');
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__topic_post_sort();
     }
     # Retrieve the contents of all of the user's posts
     if ($this->progress_level == 3) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         # If the current topic ID is already set, then we are continuing after having chained.
         $go = is_null($this->current_topic_id);
         foreach ($this->posts_data as $topicid => $dummy) {
             if (!$go && $this->current_topic_id == $topicid) {
                 $go = true;
             }
             if ($go) {
                 $this->current_topic_id = $topicid;
                 $t =& $this->posts_data[$topicid];
                 $posts =& $t['posts'];
                 $done = false;
                 while (!$done) {
                     $done = true;
                     foreach ($posts as $postid => $dummy2) {
                         $p =& $posts[$postid];
                         if ($p['content'] == null && !isset($this->posts_not_found[$postid])) {
                             $this->get_post_contents($t['forumid'], $topicid, $postid);
                             $this->write_status('Retrieved ' . $this->num_posts_retrieved . ' of ' . $this->total_posts . ' posts.');
                             $done = false;
                         }
                         $this->check_do_chain();
                     }
                 }
             }
         }
         $this->current_topic_id = null;
         # Reset this for progress level 4
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__posts_retrieval();
     }
     # Extract per-thread information: thread author and forum
     if ($this->progress_level == 4) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         # If the current topic ID is already set, then we are continuing after having chained.
         $go = is_null($this->current_topic_id);
         $total_threads = count($this->posts_data);
         foreach ($this->posts_data as $topicid => $dummy) {
             if (!$go) {
                 if ($this->current_topic_id == $topicid) {
                     $go = true;
                 }
             } else {
                 $topic =& $this->posts_data[$topicid];
                 $url = $this->get_topic_url($topic['forumid'], $topicid);
                 $this->set_url($url);
                 $html = $this->do_send();
                 if (!$this->skins_preg_match('thread_author', $html, $matches)) {
                     $this->write_and_record_err_admin("Error: couldn't find a match for the author of the thread with topic id '{$topicid}'.  The URL of the page is <" . $url . '>.', __FILE__, __METHOD__, __LINE__, $html);
                     $topic['startedby'] = '???';
                 } else {
                     $topic['startedby'] = $matches[1];
                     if ($this->dbg) {
                         $this->write_err("Added author of '{$topic['startedby']}' for topic id '{$topicid}'.");
                     }
                     $this->num_thread_infos_retrieved++;
                     $this->write_status('Retrieved author and topic name for ' . $this->num_thread_infos_retrieved . ' of ' . $total_threads . ' threads.');
                 }
                 $this->current_topic_id = $topicid;
                 $this->check_do_chain();
             }
         }
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__extract_per_thread_info();
     }
     # Warn about missing posts
     if ($this->progress_level == 5) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         if ($this->posts_not_found) {
             $this->write_err(PHP_EOL . PHP_EOL . PHP_EOL . "The contents of the following posts were not found::" . PHP_EOL . PHP_EOL . PHP_EOL);
             foreach ($this->posts_not_found as $postid => $dummy) {
                 $a = $this->find_post($postid);
                 if ($a == false) {
                     $this->write_err("\tError: failed to find post with ID '{$postid}' in internal data.");
                 } else {
                     list($p, $t, $topicid) = $a;
                     $this->write_err("\t{$p['posttitle']} ({$t['topic']}; {$p['timestamp']}; {$t['forum']}; forumid: {$t['forumid']}; topicid: {$topicid}; postid: {$postid}; " . $this->get_post_url($t['forumid'], $topicid, $postid) . ')');
                 }
             }
         }
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__handle_missing_posts();
     }
     # Write output
     if ($this->progress_level == 6) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         $this->write_status('Writing output.');
         # Write all output variants
         $this->write_output();
         # Signal that we are done
         $this->write_status('DONE');
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__write_output();
     }
     # Potentially send an admin email re non-fatal errors.
     if ($this->progress_level == 7) {
         if ($this->dbg) {
             $this->write_err('Entered progress level ' . $this->progress_level);
         }
         if ($this->web_initiated) {
             $errs = file_get_contents(make_errs_filename($this->token));
             // Disable error messages because if there are no errors then this file
             // won't exist - we want to avoid an error message telling us as much.
             $errs_admin = @file_get_contents(make_errs_admin_filename($this->token));
             if ($errs || $errs_admin) {
                 $err_msg = '';
                 if ($errs) {
                     $len = strlen($errs);
                     $trunc_msg = '';
                     if ($len > FUPS_MAX_ERROR_FILE_EMAIL_LENGTH) {
                         $errs = substr($errs, 0, FUPS_MAX_ERROR_FILE_EMAIL_LENGTH);
                         $trunc_msg = ' (truncated from ' . number_format($len) . ' bytes to ' . number_format(FUPS_MAX_ERROR_FILE_EMAIL_LENGTH) . ' bytes)';
                     }
                     // No need to include the settings and classname if admin error info exists too,
                     // because settings and classname are already included each time the admin error
                     // file is appended to.
                     if (!$errs_admin) {
                         $settings_msg = static::get_settings_msg_s(static::get_settings_str());
                         $classname_msg = static::get_classname_msg_s(get_class($this));
                         $err_msg .= $settings_msg . PHP_EOL . PHP_EOL . $classname_msg . PHP_EOL;
                     }
                     $err_msg .= 'The following non-fatal errors were recorded in the error file' . $trunc_msg . ':' . PHP_EOL . PHP_EOL . $errs . PHP_EOL;
                 }
                 if ($errs_admin) {
                     if ($errs) {
                         $err_msg .= PHP_EOL . PHP_EOL;
                     }
                     $len = strlen($errs_admin);
                     $trunc_msg = '';
                     if ($len > FUPS_MAX_ADMIN_FILE_EMAIL_LENGTH) {
                         $errs_admin = substr($errs_admin, 0, FUPS_MAX_ADMIN_FILE_EMAIL_LENGTH);
                         $trunc_msg = ' (truncated from ' . number_format($len) . ' bytes to ' . number_format(FUPS_MAX_ADMIN_FILE_EMAIL_LENGTH) . ' bytes)';
                     }
                     $err_msg .= 'The following extended non-fatal error messages were recorded in the admin error file' . $trunc_msg . ':' . PHP_EOL . PHP_EOL . $errs_admin . PHP_EOL;
                 }
                 static::send_err_mail_to_admin_s($err_msg, $this->token, false);
             }
         }
         $hook_method = 'hook_after__' . $this->progress_levels[$this->progress_level];
         $this->progress_level++;
         $this->{$hook_method}();
         // hook_after__check_send_non_fatal_err_email();
     }
 }
Exemple #3
0
                        $file_errs .= ' ';
                    }
                    $file_errs .= 'Error: unable to write to the serialization file.';
                }
                $cmd = make_php_exec_cmd(array('token' => $token));
                if (!try_run_bg_proc($cmd)) {
                    $err = 'Apologies, the server encountered a technical error: it was unable to initiate the background process to perform the task of scraping, sorting and finally presenting your posts. The command used was:<br />' . PHP_EOL . '<br />' . PHP_EOL . $cmd . '<br />' . PHP_EOL . '<br />' . PHP_EOL . 'You might like to try again or <a href="' . FUPS_CONTACT_URL . '">contact me</a> about this error.';
                }
            }
        }
    }
} else {
    $token = $_GET['token'];
    if (validate_token($token, $err)) {
        $status_filename = make_status_filename($token);
        $errs_filename = make_errs_filename($token);
        $errs_admin_filename = make_errs_admin_filename($token);
    }
}
if (!$err) {
    $ts = @filemtime($status_filename);
    if ($ts === false) {
        $err = 'The status file for your FUPS process with token "' . $token . '" does not exist - possibly because you have already deleted it.';
    }
    $status = @file_get_contents($status_filename);
    $errs = @file_get_contents($errs_filename);
    $errs_admin = @file_get_contents($errs_admin_filename);
}
$head_extra = '';
if (!$err) {
    global $fups_url_run, $fups_url_homepage;
Exemple #4
0
    $op_info_filename = make_output_info_filename($token);
    if (is_file($op_info_filename)) {
        $output_info = json_decode(file_get_contents($op_info_filename), true);
        if (is_array($output_info)) {
            $output_dir = null;
            foreach ($output_info as $opv) {
                try_delete_file($opv['filepath'], '"' . $opv['filepath'] . '"', false, $err, $num_files_deleted, false);
                $output_dir = dirname($opv['filepath']);
            }
            @rmdir($output_dir);
        }
    }
    if (validate_token($token, $err)) {
        try_delete_file(make_settings_filename($token), 'settings', true, $err, $num_files_deleted);
        try_delete_file(make_status_filename($token), 'status', false, $err, $num_files_deleted);
        try_delete_file(make_errs_filename($token), 'error', false, $err, $num_files_deleted);
        try_delete_file(make_errs_admin_filename($token), 'errors (admin)', false, $err, $num_files_deleted, false);
        try_delete_file(make_output_info_filename($token), 'output info', false, $err, $num_files_deleted, false);
        try_delete_file(make_serialize_filename($token), 'serialisation', true, $err, $num_files_deleted);
        try_delete_file(make_cookie_filename($token), 'cookie', true, $err, $num_files_deleted, false);
        try_delete_file(make_cancellation_filename($token), 'cancellation', true, $err, $num_files_deleted, false);
    }
}
function try_delete_file($filename, $name, $sensitive, &$err, &$num_files_deleted, $add_err_if_file_not_present = true)
{
    global $fups_url_homepage;
    if (!is_file($filename)) {
        if ($add_err_if_file_not_present) {
            $err .= $err ? ' Another' : 'An';
            $err .= ' error occurred: the ' . $name . ' file does not exist on disk; possibly you have already deleted it or it was never created in the first place.';
        }