require_login();
set_time_limit(0);
// no time limit
$projectid = validate_projectID('projectid', @$_REQUEST['projectid']);
enforce_edit_authorization($projectid);
// $format determines what is presented from this page:
//   'html' - page is rendered with frequencies included
//   'file' - all words and frequencies are presented as a
//            downloaded file
// 'update' - update the list
$format = get_enumerated_param($_REQUEST, 'format', 'html', array('html', 'file', 'update'));
if ($format == "update") {
    $postedWords = parse_posted_words($_POST);
    $words = load_project_bad_words($projectid);
    $words = array_merge($words, $postedWords);
    save_project_bad_words($projectid, $words);
    $format = "html";
}
$title = _("Candidates for project's Bad Words List from diff analysis");
$page_text = _("Displayed below are words from this project that are likely stealth scannos based on changes proofreaders have made to the project text.");
$page_text .= " ";
$page_text .= _("The results list was generated by comparing the uploaded OCR text and the most recent text of each page. OCRed words that WordCheck would not currently flag, but some instances of which were changed by proofreaders and some instances of which still appear in the project text are included in the results. The results list also shows how often, and how, the word was changed by proofreaders.");
list($percent_changed, $instances_left, $messages, $instances_changed_to, $instances_changed) = _get_word_list($projectid);
if ($format == "file") {
    $filename = "{$projectid}_project_scannos.txt";
    header("Content-type: text/plain");
    header('Content-Disposition: attachment; filename="' . $filename . '"');
    // The cache-control and pragma is a hack for IE not accepting filenames
    header('Cache-Control: must-revalidate, post-check=0, pre-check=0');
    header('Pragma: public');
    // Process the $instances_[changed_to|changed|left] arrays with the
function merge_wordcheck_files($from_id, $to_id)
{
    global $projects_dir;
    // good words
    $from_words = load_project_good_words($from_id);
    $to_words = load_project_good_words($to_id);
    $to_words = array_merge($to_words, $from_words);
    save_project_good_words($to_id, $to_words);
    // crying out for some abstraction here?
    // bad words
    $from_words = load_project_bad_words($from_id);
    $to_words = load_project_bad_words($to_id);
    $to_words = array_merge($to_words, $from_words);
    save_project_bad_words($to_id, $to_words);
    // suggestions
    // the file format is complicated and may change
    // so we take the sledgehammer approach, as suggested by cpeel...
    $from_path = "{$projects_dir}/{$from_id}/good_word_suggestions.txt";
    if (!is_file($from_path)) {
        // The file does not exist.
        // Treat that the same as if it existed and was empty.
        $from_suggs = "";
    } else {
        $from_suggs = file_get_contents($from_path);
    }
    $to_path = "{$projects_dir}/{$to_id}/good_word_suggestions.txt";
    if (!is_file($to_path)) {
        // The file does not exist.
        // Treat that the same as if it existed and was empty.
        $to_suggs = "";
    } else {
        $to_suggs = file_get_contents($to_path);
    }
    file_put_contents($to_path, $to_suggs . $from_suggs);
    // we're assuming the projects are in unavailable or waiting, so there
    // is going to be no need to put locks on the files or anything fancy
}
 function save_to_db()
 {
     global $projects_dir, $pguser;
     $postednum_str = $this->postednum == "" ? "NULL" : "'{$this->postednum}'";
     // Call addslashes() on any members of $this that might contain
     // single-quotes/apostrophes (because they are unescaped, and
     // would otherwise break the query).
     $common_project_settings = "\n            t_last_edit    = UNIX_TIMESTAMP(),\n            up_projectid   = '{$this->up_projectid}',\n            nameofwork     = '" . addslashes($this->nameofwork) . "',\n            authorsname    = '" . addslashes($this->authorsname) . "',\n            language       = '{$this->language}',\n            genre          = '{$this->genre}',\n            difficulty     = '{$this->difficulty_level}',\n            special_code   = '{$this->special_code}',\n            clearance      = '" . addslashes($this->clearance) . "',\n            comments       = '" . addslashes($this->comments) . "',\n            image_source   = '{$this->image_source}',\n            scannercredit  = '" . addslashes($this->scannercredit) . "',\n            checkedoutby   = '{$this->checkedoutby}',\n            postednum      = {$postednum_str},\n            image_preparer = '{$this->image_preparer}',\n            text_preparer  = '{$this->text_preparer}',\n            extra_credits  = '" . addslashes($this->extra_credits) . "',\n            deletion_reason= '" . addslashes($this->deletion_reason) . "'\n        ";
     $pm_setter = '';
     if (user_is_a_sitemanager()) {
         // can change PM
         $pm_setter = " username = '******',";
     } else {
         if (isset($this->clone_projectid)) {
             // cloning a project. The PM should be the same as
             // that of the project being cloned, if the user
             // isn't an SA
             $res = mysql_query("\n                SELECT username\n                FROM projects\n                WHERE projectid='{$this->clone_projectid}'\n            ") or die(mysql_error());
             list($projectmanager) = mysql_fetch_row($res);
             $pm_setter = " username = '******',";
         }
     }
     if (isset($this->projectid)) {
         // We are updating an already-existing project.
         // needn't change $pm_setter, as there is no change if the user
         // isn't an SA
         // find out what we are changing from
         $old_pih = new ProjectInfoHolder();
         $fatal_error = $old_pih->set_from_db(TRUE, $this->projectid);
         if ($fatal_error != '') {
             $fatal_error = _('site error') . ': ' . $fatal_error;
             echo "<br><center><font size='+1' color='#ff0000'><b>{$fatal_error}</b></font></center>";
             exit;
         }
         $changed_fields = get_changed_fields($this, $old_pih);
         // We're particularly interested in knowing
         // when the project comments change.
         if (!in_array('comments', $changed_fields)) {
             // no change
             $tlcc_setter = '';
         } else {
             // changed!
             $tlcc_setter = 't_last_change_comments = UNIX_TIMESTAMP(),';
         }
         // We also want to know if the edit is resulting in the project
         // effectively being checked out to a new PPer
         if ($old_pih->state == PROJ_POST_FIRST_CHECKED_OUT && in_array('checkedoutby', $changed_fields)) {
             $md_setter = 'modifieddate = UNIX_TIMESTAMP(),';
             $PPer_checkout = TRUE;
         } else {
             $md_setter = '';
             $PPer_checkout = FALSE;
         }
         // Update the projects database with the updated info
         mysql_query("\n                UPDATE projects SET\n                    {$pm_setter}\n                    {$tlcc_setter}\n                    {$md_setter}\n                    {$common_project_settings}\n                WHERE projectid='{$this->projectid}'\n            ") or die(mysql_error());
         $details1 = implode(' ', $changed_fields);
         if ($details1 == '') {
             // There are no changed fields.
             // Don't just save '' for the details1 column,
             // because then do_history() won't be able to distinguish
             // this case (no changed fields) from old cases
             // (edit occurred before we started recording changed fields).
             // Instead, use a special value.
             $details1 = 'NONE';
         }
         $e = log_project_event($this->projectid, $GLOBALS['pguser'], 'edit', $details1);
         if (!empty($e)) {
             die($e);
         }
         if ($PPer_checkout) {
             // we fake the project transition...
             $e = log_project_event($this->projectid, $GLOBALS['pguser'], 'transition', PROJ_POST_FIRST_CHECKED_OUT, PROJ_POST_FIRST_CHECKED_OUT, $this->checkedoutby);
             if (!empty($e)) {
                 die($e);
             }
         }
         // Update the MARC record with any info we've received.
         $project = new Project($this->projectid);
         $marc_record = $project->load_marc_record();
         $this->update_marc_record_from_post($marc_record);
         $project->save_marc_record($marc_record);
     } else {
         // We are creating a new project
         $this->projectid = uniqid("projectID");
         // The project ID
         if ('' == $pm_setter) {
             $pm_setter = "username = '******',";
         }
         // Insert a new row into the projects table
         mysql_query("\n                INSERT INTO projects\n                SET\n                    projectid    = '{$this->projectid}',\n                    {$pm_setter}\n                    state        = '" . PROJ_NEW . "',\n                    modifieddate = UNIX_TIMESTAMP(),\n                    t_last_change_comments = UNIX_TIMESTAMP(),\n                    {$common_project_settings}\n            ") or die(mysql_error());
         $e = log_project_event($this->projectid, $GLOBALS['pguser'], 'creation');
         if (!empty($e)) {
             die($e);
         }
         $e = project_allow_pages($this->projectid);
         if (!empty($e)) {
             die($e);
         }
         // Make a directory in the projects_dir for this project
         mkdir("{$projects_dir}/{$this->projectid}", 0777) or die("System error: unable to mkdir '{$projects_dir}/{$this->projectid}'");
         chmod("{$projects_dir}/{$this->projectid}", 0777);
         // Do MARC record manipulations
         $project = new Project($this->projectid);
         $marc_record = new MARCRecord();
         // Save original MARC record, if provided
         $yaz_array = unserialize(base64_decode($this->original_marc_array_encd));
         if ($yaz_array !== FALSE) {
             $marc_record->load_yaz_array($yaz_array);
             $project->init_marc_record($marc_record);
             // Update the MARC record with data from POST
             $this->update_marc_record_from_post($marc_record);
             $project->save_marc_record($marc_record);
         }
         // Create the project's 'good word list' and 'bad word list'.
         if (isset($this->clone_projectid)) {
             // We're creating a project via cloning.
             // Copy the original project's word-lists.
             $good_words = load_project_good_words($this->clone_projectid);
             if (is_string($good_words)) {
                 // It's an error message.
                 echo "{$good_words}<br>\n";
                 $good_words = array();
             }
             $bad_words = load_project_bad_words($this->clone_projectid);
             if (is_string($bad_words)) {
                 // It's an error message.
                 echo "{$bad_words}<br>\n";
                 $bad_words = array();
             }
         } else {
             // We're creating a project by means other than cloning
             // (from_nothing, from_marc_record, from_uberproject).
             // Initialize its GWL and BWL to empty.
             $good_words = array();
             $bad_words = array();
         }
         save_project_good_words($this->projectid, $good_words);
         save_project_bad_words($this->projectid, $bad_words);
     }
     // Create/update the Dublin Core file for the project.
     // When we get here, the project's database entry has been fully
     // updated, so we can create a Project object and allow it
     // to pull the relevant fields from the database.
     $project = new Project($this->projectid);
     $project->create_dc_xml_oai($marc_record);
     // If the project has been posted to PG, make the appropriate transition.
     if ($this->posted) {
         $err = project_transition($this->projectid, PROJ_SUBMIT_PG_POSTED, $pguser);
         if ($err != '') {
             echo "{$err}<br>\n";
             exit;
         }
     }
 }
 function save_to_files()
 {
     $good_word_conflict = $bad_word_conflict = false;
     $messages = array();
     // first, check to see if the good or bad word list
     // has changed out from beneath us
     $gwl_object = get_project_word_file($this->projectid, "good");
     $bwl_object = get_project_word_file($this->projectid, "bad");
     $current_gwl_timestamp = $gwl_object->mod_time;
     $current_bwl_timestamp = $bwl_object->mod_time;
     if ($current_gwl_timestamp != $this->gwl_timestamp) {
         // TRANSLATORS: %s is a link to the Good Word List
         $error = sprintf(_("The Good Words List was changed by another process during your edit session. Your changes to this list have not been saved to prevent data loss. View the %s and merge your changes manually. If you want the superset of both lists, simply append the contents of the Good Words List to that within the Good Words edit box below - the server will remove any duplicates. Saving this page again will override this message."), new_window_link($gwl_object->abs_url, _("Good Words List")));
         $this->gwl_timestamp = $current_gwl_timestamp;
         array_push($messages, $error);
         $good_word_conflict = true;
     } else {
         // everything looks good, save the changes
         $good_words = explode("[lf]", str_replace(array("\r", "\n"), array('', "[lf]"), $this->good_words));
         save_project_good_words($this->projectid, $good_words);
     }
     if ($current_bwl_timestamp != $this->bwl_timestamp) {
         // TRANSLATORS: %s is a link to the Bad Word List
         $error = sprintf(_("The Bad Words List was changed by another process during your edit session. Your changes to this list have not been saved to prevent data loss. View the %s and merge your changes manually. If you want the superset of both lists, simply append the contents of the Bad Words List to that within the Bad Words edit box below - the server will remove any duplicates. Saving this page again will override this message."), new_window_link($bwl_object->abs_url, _("Bad Words List")));
         $this->bwl_timestamp = $current_bwl_timestamp;
         array_push($messages, $error);
         $bad_word_conflict = true;
     } else {
         // everything looks good, save the changes
         $bad_words = explode("[lf]", str_replace(array("\r", "\n"), array('', "[lf]"), $this->bad_words));
         save_project_bad_words($this->projectid, $bad_words);
     }
     return array($good_word_conflict, $bad_word_conflict, $messages);
 }