Example #1
0
 //if the first match is MT perform QA realignment
 if ($new_match_type == 'MT') {
     $standard_words = $equivalentWordMapping["NO_MATCH"] * $raw_wc / 100;
     $check = new PostProcess($matches[0]['raw_segment'], $suggestion);
     $check->realignMTSpaces();
     //this should every time be ok because MT preserve tags, but we use the check on the errors
     //for logic correctness
     if (!$check->thereAreErrors()) {
         $suggestion = CatUtils::view2rawxliff($check->getTrgNormalized());
         $err_json = '';
     } else {
         $err_json = $check->getErrorsJSON();
     }
 } else {
     //try to perform only the tagCheck
     $check = new PostProcess($text, $suggestion);
     $check->performTagCheckOnly();
     //_TimeStampMsg( $check->getErrors() );
     if ($check->thereAreErrors()) {
         $err_json = $check->getErrorsJSON();
     } else {
         $err_json = '';
     }
 }
 !empty($matches[0]['sentence_confidence']) ? $mt_qe = floatval($matches[0]['sentence_confidence']) : ($mt_qe = null);
 //        _TimeStampMsg ( "--- (child $my_pid) : sid=$sid --- \$tm_match_type=$tm_match_type, \$fast_match_type=$fast_match_type, \$new_match_type=$new_match_type, \$equivalentWordMapping[\$new_match_type]=" . $equivalentWordMapping[ $new_match_type ] . ", \$raw_wc=$raw_wc,\$standard_words=$standard_words,\$eq_words=$eq_words" );
 $tm_data = array();
 $tm_data['id_job'] = $jid;
 $tm_data['id_segment'] = $sid;
 $tm_data['suggestions_array'] = $suggestion_json;
 $tm_data['suggestion'] = $suggestion;
 public function doAction()
 {
     if (!$this->concordance_search) {
         //execute these lines only in segment contribution search,
         //in case of user concordance search skip these lines
         //because segment can be optional
         if (empty($this->id_segment)) {
             $this->result['errors'][] = array("code" => -1, "message" => "missing id_segment");
         }
     }
     if (is_null($this->text) || $this->text === '') {
         $this->result['errors'][] = array("code" => -2, "message" => "missing text");
     }
     if (empty($this->id_job)) {
         $this->result['errors'][] = array("code" => -3, "message" => "missing id_job");
     }
     if (empty($this->num_results)) {
         $this->num_results = INIT::$DEFAULT_NUM_RESULTS_FROM_TM;
     }
     if (!empty($this->result['errors'])) {
         return -1;
     }
     //get Job Infos, we need only a row of jobs ( split )
     $this->jobData = getJobData($this->id_job, $this->password);
     $pCheck = new AjaxPasswordCheck();
     //check for Password correctness
     if (empty($this->jobData) || !$pCheck->grantJobAccessByJobData($this->jobData, $this->password)) {
         $this->result['errors'][] = array("code" => -10, "message" => "wrong password");
         return -1;
     }
     /*
      * string manipulation strategy
      *
      */
     if (!$this->concordance_search) {
         //
         $this->text = CatUtils::view2rawxliff($this->text);
         $this->source = $this->jobData['source'];
         $this->target = $this->jobData['target'];
     } else {
         $regularExpressions = $this->tokenizeSourceSearch();
         if ($this->switch_languages) {
             /*
              *
              * switch languages from user concordances search on the target language value
              * Example:
              * Job is in
              *      source: it_IT,
              *      target: de_DE
              *
              * user perform a right click for concordance help on a german word or phrase
              * we want result in italian from german source
              *
              */
             $this->source = $this->jobData['target'];
             $this->target = $this->jobData['source'];
         } else {
             $this->source = $this->jobData['source'];
             $this->target = $this->jobData['target'];
         }
     }
     $this->id_mt_engine = $this->jobData['id_mt_engine'];
     $this->id_tms = $this->jobData['id_tms'];
     $this->tm_keys = $this->jobData['tm_keys'];
     $config = array();
     if ($this->id_tms == 1) {
         /**
          * MyMemory Enabled
          */
         $config['get_mt'] = true;
         $config['mt_only'] = false;
         if ($this->id_mt_engine != 1) {
             /**
              * Don't get MT contribution from MyMemory ( Custom MT )
              */
             $config['get_mt'] = false;
         }
         $_TMS = $this->id_tms;
     } else {
         if ($this->id_tms == 0 && $this->id_mt_engine == 1) {
             /**
              * MyMemory disabled but MT Enabled and it is NOT a Custom one
              * So tell to MyMemory to get MT only
              */
             $config['get_mt'] = true;
             $config['mt_only'] = true;
             $_TMS = 1;
             /* MyMemory */
         }
     }
     /**
      * if No TM server and No MT selected $_TMS is not defined
      * so we want not to perform TMS Call
      *
      */
     if (isset($_TMS)) {
         /**
          * @var $tms Engines_MyMemory
          */
         $tms = Engine::getInstance($_TMS);
         $config = array_merge($tms->getConfigStruct(), $config);
         $config['segment'] = $this->text;
         $config['source'] = $this->source;
         $config['target'] = $this->target;
         $config['email'] = INIT::$MYMEMORY_API_KEY;
         $config['id_user'] = array();
         $config['num_result'] = $this->num_results;
         $config['isConcordance'] = $this->concordance_search;
         //get job's TM keys
         $this->checkLogin();
         try {
             if (self::isRevision()) {
                 $this->userRole = TmKeyManagement_Filter::ROLE_REVISOR;
             }
             $tm_keys = TmKeyManagement_TmKeyManagement::getJobTmKeys($this->tm_keys, 'r', 'tm', $this->uid, $this->userRole);
             if (is_array($tm_keys) && !empty($tm_keys)) {
                 foreach ($tm_keys as $tm_key) {
                     $config['id_user'][] = $tm_key->key;
                 }
             }
         } catch (Exception $e) {
             $this->result['errors'][] = array("code" => -11, "message" => "Cannot retrieve TM keys info.");
             Log::doLog($e->getMessage());
             return;
         }
         $tms_match = $tms->get($config);
         $tms_match = $tms_match->get_matches_as_array();
     }
     if ($this->id_mt_engine > 1) {
         /**
          * @var $mt Engines_Moses
          */
         $mt = Engine::getInstance($this->id_mt_engine);
         $config = $mt->getConfigStruct();
         $config['segment'] = $this->text;
         $config['source'] = $this->source;
         $config['target'] = $this->target;
         $config['id_user'] = INIT::$MYMEMORY_API_KEY;
         $config['segid'] = $this->id_segment;
         $mt_result = $mt->get($config);
         if (isset($mt_result['error']['code'])) {
             $mt_result['error']['created_by_type'] = 'MT';
             $this->result['errors'][] = $mt_result['error'];
             $mt_result = false;
         }
     }
     $matches = array();
     if (!empty($tms_match)) {
         $matches = $tms_match;
     }
     if (!empty($mt_result)) {
         $matches[] = $mt_result;
         usort($matches, array("getContributionController", "__compareScore"));
         //this is necessary since usort sorts is ascending order, thus inverting the ranking
         $matches = array_reverse($matches);
     }
     $matches = array_slice($matches, 0, $this->num_results);
     isset($matches[0]['match']) ? $firstMatchVal = floatval($matches[0]['match']) : null;
     if (isset($firstMatchVal) && $firstMatchVal >= 90 && $firstMatchVal < 100) {
         $srcSearch = strip_tags($this->text);
         $segmentFound = strip_tags($matches[0]['raw_segment']);
         $srcSearch = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $srcSearch));
         $segmentFound = mb_strtolower(preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $segmentFound));
         $fuzzy = levenshtein($srcSearch, $segmentFound) / log10(mb_strlen($srcSearch . $segmentFound) + 1);
         //levenshtein handle max 255 chars per string and returns -1, so fuzzy var can be less than 0 !!
         if ($srcSearch == $segmentFound || $fuzzy < 2.5 && $fuzzy >= 0) {
             $qaRealign = new QA($this->text, html_entity_decode($matches[0]['raw_translation']));
             $qaRealign->tryRealignTagID();
             $log_prepend = "CLIENT REALIGN IDS PROCEDURE | ";
             if (!$qaRealign->thereAreErrors()) {
                 /*
                 Log::doLog( $log_prepend . " - Requested Segment: " . var_export( $this->__postInput, true) );
                 Log::doLog( $log_prepend . "Fuzzy: " . $fuzzy .  " - Try to Execute Tag ID Realignment." );
                 Log::doLog( $log_prepend . "TMS RAW RESULT:" );
                 Log::doLog( $log_prepend . var_export($matches[0], true) );
                 Log::doLog( $log_prepend . "Realignment Success:");
                 */
                 $matches[0]['segment'] = CatUtils::rawxliff2view($this->text);
                 $matches[0]['translation'] = CatUtils::rawxliff2view($qaRealign->getTrgNormalized());
                 $matches[0]['match'] = $fuzzy == 0 ? '100%' : '99%';
                 /*
                                     Log::doLog( $log_prepend . "View Segment:     " . var_export($matches[0]['segment'], true) );
                                     Log::doLog( $log_prepend . "View Translation: " . var_export($matches[0]['translation'], true) );
                 */
             } else {
                 Log::doLog($log_prepend . 'Realignment Failed. Skip. Segment: ' . $this->__postInput['id_segment']);
             }
         }
     }
     /* New Feature only if this is not a MT and if it is a ( 90 =< MATCH < 100 ) */
     if (!$this->concordance_search) {
         //execute these lines only in segment contribution search,
         //in case of user concordance search skip these lines
         $res = $this->setSuggestionReport($matches);
         if (is_array($res) and array_key_exists("error", $res)) {
             // error occurred
         }
         //
     }
     foreach ($matches as &$match) {
         if (strpos($match['created_by'], 'MT') !== false) {
             $match['match'] = 'MT';
             $QA = new PostProcess($match['raw_segment'], $match['raw_translation']);
             $QA->realignMTSpaces();
             //this should every time be ok because MT preserve tags, but we use the check on the errors
             //for logic correctness
             if (!$QA->thereAreErrors()) {
                 $match['raw_translation'] = $QA->getTrgNormalized();
                 $match['translation'] = CatUtils::rawxliff2view($match['raw_translation']);
             } else {
                 Log::doLog($QA->getErrors());
             }
         }
         if ($match['created_by'] == 'MT!') {
             $match['created_by'] = 'MT';
             //MyMemory returns MT!
         } else {
             $match['created_by'] = $this->__changeSuggestionSource($match);
         }
         if (!empty($match['sentence_confidence'])) {
             $match['sentence_confidence'] = round($match['sentence_confidence'], 0) . "%";
         }
         if ($this->concordance_search) {
             $match['segment'] = strip_tags(html_entity_decode($match['segment']));
             $match['segment'] = preg_replace('#[\\x{20}]{2,}#u', chr(0x20), $match['segment']);
             //Do something with &$match, tokenize strings and send to client
             $match['segment'] = preg_replace(array_keys($regularExpressions), array_values($regularExpressions), $match['segment']);
             $match['translation'] = strip_tags(html_entity_decode($match['translation']));
         }
     }
     $this->result['data']['matches'] = $matches;
 }
 function processReleases()
 {
     require_once WWW_DIR . "/lib/binaries.php";
     $db = new DB();
     $currTime_ori = $db->queryOneRow("SELECT NOW() as now");
     $cat = new Category();
     $nzb = new Nzb();
     $s = new Sites();
     $releaseRegex = new ReleaseRegex();
     $page = new Page();
     $groups = new Groups();
     $retcount = 0;
     echo $s->getLicense();
     echo "\n\nStarting release update process (" . date("Y-m-d H:i:s") . ")\n";
     if (!file_exists($page->site->nzbpath)) {
         echo "Bad or missing nzb directory - " . $page->site->nzbpath;
         return -1;
     }
     //
     // Get all regexes for all groups which are to be applied to new binaries
     // in order of how they should be applied
     //
     $releaseRegex->get();
     echo "Stage 1 : Applying regex to binaries\n";
     $activeCategories = $cat->get();
     $catbasedsizes = $db->getLookupAsArray($activeCategories, "ID");
     $activeGroups = $groups->getActive(false);
     $groupbasedminsizes = $db->getLookupAsArray($groups->getAllNoReleases(), "ID");
     foreach ($activeGroups as $groupArr) {
         //check if regexes have already been applied during update binaries
         if ($groupArr['regexmatchonly'] == 1) {
             continue;
         }
         $groupRegexes = $releaseRegex->getForGroup($groupArr['name']);
         echo "Stage 1 : Applying " . sizeof($groupRegexes) . " regexes to group " . $groupArr['name'] . "\n";
         // Get out all binaries of STAGE0 for current group
         $newUnmatchedBinaries = array();
         $ressql = sprintf("SELECT binaries.ID, binaries.name, binaries.date, binaries.totalParts, binaries.procstat, binaries.fromname from binaries where groupID = %d and procstat IN (%d,%d) and regexID IS NULL order by binaries.date asc", $groupArr['ID'], Releases::PROCSTAT_NEW, Releases::PROCSTAT_TITLENOTMATCHED);
         $resbin = $db->queryDirect($ressql);
         $matchedbins = 0;
         while ($rowbin = $db->getAssocArray($resbin)) {
             $regexMatches = array();
             foreach ($groupRegexes as $groupRegex) {
                 $regexCheck = $releaseRegex->performMatch($groupRegex, $rowbin['name']);
                 if ($regexCheck !== false) {
                     $regexMatches = $regexCheck;
                     break;
                 }
             }
             if (!empty($regexMatches)) {
                 $matchedbins++;
                 $relparts = explode("/", $regexMatches['parts']);
                 $db->exec(sprintf("update binaries set relname = replace(%s, '_', ' '), relpart = %d, reltotalpart = %d, procstat=%d, categoryID=%s, regexID=%d, reqID=%s where ID = %d", $db->escapeString($regexMatches['name']), $relparts[0], $relparts[1], Releases::PROCSTAT_TITLEMATCHED, $regexMatches['regcatid'], $regexMatches['regexID'], $db->escapeString($regexMatches['reqID']), $rowbin["ID"]));
             } else {
                 if ($rowbin['procstat'] == Releases::PROCSTAT_NEW) {
                     $newUnmatchedBinaries[] = $rowbin['ID'];
                 }
             }
         }
         //mark as not matched
         if (!empty($newUnmatchedBinaries)) {
             $db->exec(sprintf("update binaries set procstat=%d where ID IN (%s)", Releases::PROCSTAT_TITLENOTMATCHED, implode(',', $newUnmatchedBinaries)));
         }
     }
     //
     // Move all binaries from releases which have the correct number of files on to the next stage.
     //
     echo "Stage 2 : Marking binaries where all parts are available";
     $result = $db->queryDirect(sprintf("SELECT relname, date, SUM(reltotalpart) AS reltotalpart, groupID, reqID, fromname, SUM(num) AS num, coalesce(g.minfilestoformrelease, s.minfilestoformrelease) as minfilestoformrelease FROM   ( SELECT relname, reltotalpart, groupID, reqID, fromname, max(date) as date, COUNT(ID) AS num FROM binaries     WHERE procstat = %s     GROUP BY relname, reltotalpart, groupID, reqID, fromname ORDER BY NULL ) x left outer join groups g on g.ID = x.groupID inner join ( select value as minfilestoformrelease from site where setting = 'minfilestoformrelease' ) s GROUP BY relname, groupID, reqID, fromname, minfilestoformrelease ORDER BY NULL", Releases::PROCSTAT_TITLEMATCHED));
     while ($row = $db->getAssocArray($result)) {
         $retcount++;
         //
         // Less than the site permitted number of files in a release. Dont discard it, as it may
         // be part of a set being uploaded.
         //
         if ($row["num"] < $row["minfilestoformrelease"]) {
             //echo "Number of files in release ".$row["relname"]." less than site/group setting (".$row['num']."/".$row["minfilestoformrelease"].")\n";
             //$db->exec(sprintf("update binaries set procattempts = procattempts + 1 where relname = %s and procstat = %d and groupID = %d and fromname = %s", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"]) ));
         } elseif ($row["num"] >= $row["reltotalpart"]) {
             $incomplete = false;
             if ($row['reltotalpart'] == 0 && strtotime($currTime_ori['now']) - strtotime($row['date']) < 14400) {
                 $incomplete = true;
             } else {
                 // Check that the binary is complete
                 $binlist = $db->query(sprintf("SELECT binaries.ID, totalParts, date, COUNT(DISTINCT parts.messageID) AS num FROM binaries, parts WHERE binaries.ID=parts.binaryID AND binaries.relname = %s AND binaries.procstat = %d AND binaries.groupID = %d AND binaries.fromname = %s GROUP BY binaries.ID ORDER BY NULL", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                 foreach ($binlist as $rowbin) {
                     if ($rowbin['num'] < $rowbin['totalParts']) {
                         // Allow to binary to release if posted to usenet longer than four hours ago and we still don't have all the parts
                         if (!(strtotime($currTime_ori['now']) - strtotime($rowbin['date']) > 14400)) {
                             $incomplete = true;
                             break;
                         }
                     }
                 }
             }
             if (!$incomplete) {
                 //
                 // Right number of files, but see if the binary is a allfilled/reqid post, in which case it needs its name looked up
                 //
                 if ($row['reqID'] != '' && $page->site->reqidurl != "") {
                     //
                     // Try and get the name using the group
                     //
                     $binGroup = $db->queryOneRow(sprintf("SELECT name FROM groups WHERE ID = %d", $row["groupID"]));
                     $newtitle = $this->getReleaseNameForReqId($page->site->reqidurl, $page->site->newznabID, $binGroup["name"], $row["reqID"]);
                     //
                     // if the feed/group wasnt supported by the scraper, then just use the release name as the title.
                     //
                     if ($newtitle == "no feed") {
                         $newtitle = $row["relname"];
                     }
                     //
                     // Valid release with right number of files and title now, so move it on
                     //
                     if ($newtitle != "") {
                         $db->exec(sprintf("update binaries set relname = %s, procstat=%d where relname = %s and procstat = %d and groupID = %d and fromname=%s", $db->escapeString($newtitle), Releases::PROCSTAT_READYTORELEASE, $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                     } else {
                         //
                         // Item not found, if the binary was added to the index yages ago, then give up.
                         //
                         $maxaddeddate = $db->queryOneRow(sprintf("SELECT NOW() as now, MAX(dateadded) as dateadded FROM binaries WHERE relname = %s and procstat = %d and groupID = %d and fromname=%s", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                         //
                         // If added to the index over 48 hours ago, give up trying to determine the title
                         //
                         if (strtotime($maxaddeddate['now']) - strtotime($maxaddeddate['dateadded']) > 60 * 60 * 48) {
                             $db->exec(sprintf("update binaries set procstat=%d where relname = %s and procstat = %d and groupID = %d and fromname=%s", Releases::PROCSTAT_NOREQIDNAMELOOKUPFOUND, $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                         }
                     }
                 } else {
                     $db->exec(sprintf("update binaries set procstat=%d where relname = %s and procstat = %d and groupID = %d and fromname=%s", Releases::PROCSTAT_READYTORELEASE, $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                 }
             }
         }
         if ($retcount % 100 == 0) {
             echo ".";
         }
     }
     $retcount = 0;
     echo "\nStage 3 : Creating releases from complete binaries\n";
     //
     // Get out all distinct relname, group from binaries of STAGE2
     //
     $result = $db->queryDirect(sprintf("SELECT relname, groupID, g.name as group_name, fromname, max(categoryID) as categoryID, max(regexID) as regexID, max(reqID) as reqID, MAX(date) as date, count(binaries.ID) as parts, coalesce(sum(binaries.size),0) as size from binaries inner join groups g on g.ID = binaries.groupID where procstat = %d and relname is not null group by relname, g.name, groupID, fromname ORDER BY COUNT(binaries.ID) desc", Releases::PROCSTAT_READYTORELEASE));
     while ($row = $db->getAssocArray($result)) {
         $relguid = md5(uniqid());
         //
         // Get categoryID if one has been allocated to this
         //
         if ($row["categoryID"] != "") {
             $catId = $row["categoryID"];
         } else {
             $catId = $cat->determineCategory($row["group_name"], $row["relname"]);
         }
         //
         // Determine if size matches permitted boundaries and discard here if not.
         //
         $gsize = $groupbasedminsizes[$row["groupID"]][0]["minsizetoformrelease"];
         if ($gsize == "" || $gsize == 0) {
             $gsize = $row["size"];
         }
         $ssize = $page->site->minsizetoformrelease;
         if ($ssize == "" || $ssize == 0) {
             $ssize = $row["size"];
         }
         $csize = $catbasedsizes[$catId][0]["minsizetoformrelease"];
         if ($csize == "" || $csize == 0) {
             $csize = $row["size"];
         }
         $cpsize = $catbasedsizes[$catId][0]["parentminsizetoformrelease"];
         if ($cpsize == "" || $cpsize == 0) {
             $cpsize = $row["size"];
         }
         $cmaxsize = $catbasedsizes[$catId][0]["maxsizetoformrelease"];
         if ($cmaxsize == "" || $cmaxsize == 0) {
             $cmaxsize = $row["size"];
         }
         $cpmaxsize = $catbasedsizes[$catId][0]["parentmaxsizetoformrelease"];
         if ($cpmaxsize == "" || $cpmaxsize == 0) {
             $cpmaxsize = $row["size"];
         }
         $overallminsize = max($gsize, $ssize, $csize, $cpsize);
         $overallmaxsize = min($cmaxsize, $cpmaxsize);
         if ($row["size"] < $overallminsize || $row["size"] > $overallmaxsize) {
             echo sprintf("Stage 3 : Discarding - %s (Size %s outside permitted range of %s%s)\n", $row["relname"], formatBytes($row["size"]), $overallminsize != $row["size"] ? formatBytes($overallminsize) : "", $overallmaxsize != $row["size"] ? formatBytes($overallmaxsize) : "");
             $db->exec(sprintf("DELETE parts, binaries FROM parts JOIN binaries ON binaries.ID = parts.binaryID WHERE relname=%s and groupID=%d and fromname=%s and procstat=%d", $db->escapeString($row["relname"]), $row["groupID"], $db->escapeString($row["fromname"]), Releases::PROCSTAT_READYTORELEASE));
         } else {
             // Clean release name
             $cleanRelName = $this->cleanReleaseName($row['relname']);
             $relid = $this->insertRelease($cleanRelName, $row["parts"], $row["groupID"], $relguid, $catId, $row["regexID"], $row["date"], $row["fromname"], $row["reqID"], $page->site);
             //
             // Tag every binary for this release with its parent release id
             //
             $db->exec(sprintf("update binaries set procstat = %d, releaseID = %d where relname = %s and procstat = %d and groupID = %d and fromname=%s", Releases::PROCSTAT_RELEASED, $relid, $db->escapeString($row["relname"]), Releases::PROCSTAT_READYTORELEASE, $row["groupID"], $db->escapeString($row["fromname"])));
             //
             // Write the nzb to disk
             //
             $nzbfile = $nzb->getNZBPath($relguid, $page->site->nzbpath, true);
             $nzb->writeNZBforReleaseId($relid, $cleanRelName, $catId, $nzbfile);
             //
             // Remove used binaries
             //
             $db->exec(sprintf("DELETE parts, binaries FROM parts JOIN binaries ON binaries.ID = parts.binaryID WHERE releaseID = %d ", $relid));
             //
             // If nzb successfully written, then load it and get size completion from it
             //
             $nzbInfo = new nzbInfo();
             if (!$nzbInfo->loadFromFile($nzbfile)) {
                 echo "Stage 3 : Failed to write nzb file (bad perms?) " . $nzbfile . "\n";
                 //copy($nzbfile, "./ERRORNZB_".$relguid);
                 $this->delete($relid);
             } else {
                 // Check if gid already exists
                 $dupes = $db->queryOneRow(sprintf("SELECT EXISTS(SELECT 1 FROM releases WHERE gid = %s) as total", $db->escapeString($nzbInfo->gid)));
                 if ($dupes['total'] > 0) {
                     echo "Stage 3 : Duplicate - " . $cleanRelName . " -" . $nzbInfo->gid . "-\n";
                     $this->delete($relid);
                 } else {
                     $db->exec(sprintf("update releases set totalpart = %d, size = %s, completion = %d, GID=%s where ID = %d", $nzbInfo->filecount, $nzbInfo->filesize, $nzbInfo->completion, $db->escapeString($nzbInfo->gid), $relid));
                     echo "Stage 3 : Added release " . $cleanRelName . "\n";
                     //Increment new release count
                     $retcount++;
                 }
             }
         }
     }
     //
     // Delete any releases under the minimum completion percent.
     //
     if ($page->site->completionpercent != 0) {
         echo "Stage 4 : Deleting releases less than " . $page->site->completionpercent . " complete\n";
         $result = $db->query(sprintf("select ID from releases where completion > 0 and completion < %d", $page->site->completionpercent));
         foreach ($result as $row) {
             $this->delete($row["ID"]);
         }
     }
     /*
      *Potentially leave this in to mop up release when the cat sizes change.
     		//
     		// Delete releases whos minsize is less than the site or group minimum
     		//
     		$result = $db->query("select releases.ID from releases left outer join (SELECT g.ID, coalesce(g.minsizetoformrelease, s.minsizetoformrelease) as minsizetoformrelease FROM groups g inner join ( select value as minsizetoformrelease from site where setting = 'minsizetoformrelease' ) s ) x on x.ID = releases.groupID where minsizetoformrelease != 0 and releases.size < minsizetoformrelease");
     		if (count($result) > 0)
     		{
     			echo "Stage 4 : Deleting ".count($result)." release(s) where size is smaller than minsize for site/group\n";
     			foreach ($result as $row)
     				$this->delete($row["ID"]);
     		}
     
     		$result = $db->query("select releases.ID, name, categoryID, size FROM releases JOIN (
     						select 
     						catc.ID, 
     						case when catc.minsizetoformrelease = 0 then catp.minsizetoformrelease else catc.minsizetoformrelease end as minsizetoformrelease, 
     						case when catc.maxsizetoformrelease = 0 then catp.maxsizetoformrelease else catc.maxsizetoformrelease end as maxsizetoformrelease 
     						from category catp join category catc on catc.parentID = catp.ID 
     						where (catc.minsizetoformrelease != 0 or catc.maxsizetoformrelease != 0) or (catp.minsizetoformrelease != 0 or catp.maxsizetoformrelease != 0) 
     						) x on x.ID = releases.categoryID 
     						where 
     						(size < minsizetoformrelease and minsizetoformrelease != 0) or 
     						(size > maxsizetoformrelease and maxsizetoformrelease != 0)");
     
     		if(count($result) > 0)
     		{
     			echo "Stage 4 : Deleting release(s) not matching category min/max size ...\n";
     			foreach ($result as $r){
     				$this->delete($r['ID']);
     			}			
     		}
     */
     echo "Stage 5 : Post processing started\n";
     $postprocess = new PostProcess(true);
     $postprocess->processAll();
     //
     // aggregate the releasefiles upto the releases.
     //
     echo "Stage 6 : Aggregating Files\n";
     $db->exec("update releases INNER JOIN (SELECT releaseID, COUNT(ID) AS num FROM releasefiles GROUP BY releaseID) b ON b.releaseID = releases.ID and releases.rarinnerfilecount = 0 SET rarinnerfilecount = b.num");
     // Remove the binaries and parts used to form releases, or that are duplicates.
     //
     if ($page->site->partsdeletechunks > 0) {
         echo "Stage 7 : Chunk deleting unused binaries and parts";
         $query = sprintf("SELECT parts.ID as partsID,binaries.ID as binariesID FROM parts\r\n\t\t\t\t\t\tLEFT JOIN binaries ON binaries.ID = parts.binaryID\r\n\t\t\t\t\t\tWHERE binaries.dateadded < %s - INTERVAL %d HOUR LIMIT 0,%d", $db->escapeString($currTime_ori["now"]), ceil($page->site->rawretentiondays * 24), $page->site->partsdeletechunks);
         $cc = 0;
         $done = false;
         while (!$done) {
             $dd = $cc;
             $result = $db->query($query);
             if (count($result) > 0) {
                 $pID = array();
                 $bID = array();
                 foreach ($result as $row) {
                     $pID[] = $row['partsID'];
                     $bID[] = $row['binariesID'];
                 }
                 $pID = '(' . implode(',', $pID) . ')';
                 $bID = '(' . implode(',', $bID) . ')';
                 $fr = $db->exec("DELETE FROM parts WHERE ID IN {$pID}");
                 if ($fr > 0) {
                     $cc += $fr;
                     $cc += $db->exec("DELETE FROM binaries WHERE ID IN {$bID}");
                 }
                 unset($pID);
                 unset($bID);
                 if ($cc == $dd) {
                     $done = true;
                 }
                 echo $cc % 10000 ? '.' : '';
             } else {
                 $done = true;
             }
         }
         echo "\nStage 7 : Complete - " . $cc . " rows affected\n";
     } else {
         echo "Stage 7 : Deleting unused binaries and parts\n";
         $db->exec(sprintf("DELETE parts, binaries FROM parts JOIN binaries ON binaries.ID = parts.binaryID\r\n\t\t\tWHERE binaries.dateadded < %s - INTERVAL %d HOUR", $db->escapeString($currTime_ori["now"]), ceil($page->site->rawretentiondays * 24)));
     }
     //
     // User/Request housekeeping, should ideally move this to its own section, but it needs to be done automatically.
     //
     $users = new Users();
     $users->pruneRequestHistory($page->site->userdownloadpurgedays);
     echo "Done    : Added " . $retcount . " releases\n\n";
     return $retcount;
 }
Example #4
0
    public function testRealString1()
    {
        $source_seg = <<<TRG
<g id="1877">31-235</g>\t<g id="1878">The default PR upper alarm is120.</g>
TRG;
        $target_seg = <<<SRC
<g id="1877"> 31-235 </g><g id="1878"> L'impostazione predefinita PR IS120 allarme. </g>
SRC;
        $source_seg = CatUtils::view2rawxliff($source_seg);
        $target_seg = CatUtils::view2rawxliff($target_seg);
        $check = new PostProcess($source_seg, $target_seg);
        $check->realignMTSpaces();
        $warnings = $check->getWarnings();
        $errors = $check->getErrors();
        $this->assertFalse($check->thereAreErrors());
        $this->assertFalse($check->thereAreWarnings());
        $this->assertEquals(count($warnings), 1);
        $this->assertEquals(0, $warnings[0]->outcome);
        $this->assertEquals(count($errors), 1);
        $this->assertEquals(0, $errors[0]->outcome);
        $normalized = $check->getTrgNormalized();
        //trick strings are not exactly the same .. there's a tab between tags in source string
        $this->assertEquals('<g id="1877">31-235</g><g id="1878">L\'impostazione predefinita PR IS120 allarme.</g>', $normalized);
    }
Example #5
0
 /**
  * @param QueueElement $queueElement
  *
  * @throws Exception
  * @throws ReQueueException
  */
 protected function _updateRecord(QueueElement $queueElement)
 {
     $tm_match_type = $this->_matches[0]['match'];
     if (stripos($this->_matches[0]['created_by'], "MT") !== false) {
         $tm_match_type = "MT";
     }
     $suggestion = \CatUtils::view2rawxliff($this->_matches[0]['raw_translation']);
     //preg_replace all x tags <x not closed > inside suggestions with correctly closed
     $suggestion = preg_replace('|<x([^/]*?)>|', '<x\\1/>', $suggestion);
     $suggestion_match = $this->_matches[0]['match'];
     $suggestion_json = json_encode($this->_matches);
     $suggestion_source = $this->_matches[0]['created_by'];
     $equivalentWordMapping = json_decode($queueElement->params->payable_rates, true);
     $new_match_type = $this->_getNewMatchType($tm_match_type, $queueElement->params->match_type, $equivalentWordMapping, empty($this->_matches[0]['memory_key']));
     $eq_words = $equivalentWordMapping[$new_match_type] * $queueElement->params->raw_word_count / 100;
     $standard_words = $eq_words;
     //if the first match is MT perform QA realignment
     if ($new_match_type == 'MT') {
         $standard_words = $equivalentWordMapping["NO_MATCH"] * $queueElement->params->raw_word_count / 100;
         $check = new \PostProcess($this->_matches[0]['raw_segment'], $suggestion);
         $check->realignMTSpaces();
         //this should every time be ok because MT preserve tags, but we use the check on the errors
         //for logic correctness
         if (!$check->thereAreErrors()) {
             $suggestion = \CatUtils::view2rawxliff($check->getTrgNormalized());
             $err_json = '';
         } else {
             $err_json = $check->getErrorsJSON();
         }
     } else {
         //try to perform only the tagCheck
         $check = new \PostProcess($queueElement->params->segment, $suggestion);
         $check->performTagCheckOnly();
         //_TimeStampMsg( $check->getErrors() );
         if ($check->thereAreErrors()) {
             $err_json = $check->getErrorsJSON();
         } else {
             $err_json = '';
         }
     }
     !empty($this->_matches[0]['sentence_confidence']) ? $mt_qe = floatval($this->_matches[0]['sentence_confidence']) : ($mt_qe = null);
     $tm_data = array();
     $tm_data['id_job'] = $queueElement->params->id_job;
     $tm_data['id_segment'] = $queueElement->params->id_segment;
     $tm_data['suggestions_array'] = $suggestion_json;
     $tm_data['suggestion'] = $suggestion;
     $tm_data['match_type'] = $new_match_type;
     $tm_data['eq_word_count'] = $eq_words;
     $tm_data['standard_word_count'] = $standard_words;
     $tm_data['translation'] = $suggestion;
     $tm_data['tm_analysis_status'] = "DONE";
     $tm_data['warning'] = (int) $check->thereAreErrors();
     $tm_data['serialized_errors_list'] = $err_json;
     $tm_data['mt_qe'] = $mt_qe;
     $tm_data['suggestion_source'] = $suggestion_source;
     if (!empty($tm_data['suggestion_source'])) {
         if (strpos($tm_data['suggestion_source'], "MT") === false) {
             $tm_data['suggestion_source'] = 'TM';
         } else {
             $tm_data['suggestion_source'] = 'MT';
         }
     }
     //check the value of suggestion_match
     $tm_data['suggestion_match'] = $suggestion_match;
     if ($tm_data['suggestion_match'] == "100%" && $queueElement->params->pretranslate_100) {
         $tm_data['status'] = \Constants_TranslationStatus::STATUS_TRANSLATED;
     }
     $updateRes = setSuggestionUpdate($tm_data);
     if ($updateRes < 0) {
         $this->_doLog("**** Error occurred during the storing (UPDATE) of the suggestions for the segment {$tm_data['id_segment']}");
         throw new ReQueueException("**** Error occurred during the storing (UPDATE) of the suggestions for the segment {$tm_data['id_segment']}", self::ERR_REQUEUE);
     } elseif ($updateRes == 0) {
         //There was not a fast Analysis??? Impossible.
         $this->_doLog("No row found: " . $tm_data['id_segment'] . "-" . $tm_data['id_job']);
     } else {
         $this->_doLog("Row found: " . $tm_data['id_segment'] . "-" . $tm_data['id_job'] . " - UPDATED.");
     }
     //set redis cache
     $this->_incrementAnalyzedCount($queueElement->params->pid, $eq_words, $standard_words);
     $this->_decSegmentsToAnalyzeOfWaitingProjects($queueElement->params->pid);
     $this->_tryToCloseProject($queueElement->params->pid);
 }
Example #6
0
 /**
  * Process all that require a single thread.
  */
 private function processSingle()
 {
     $postProcess = new PProcess(['Settings' => $this->pdo, 'ColorCLI' => $this->_colorCLI]);
     $pp = new \PostProcess(true);
     //$postProcess->processAnime();
     $postProcess->processBooks();
     $pp->processBooks();
     $postProcess->processConsoles();
     $postProcess->processGames();
     $postProcess->processMusic();
     $pp->processMusic();
     $postProcess->processXXX();
 }
Example #7
0
 function processReleases()
 {
     $db = new DB();
     $cat = new Category();
     $bin = new Binaries();
     $nzb = new Nzb();
     $s = new Sites();
     $relreg = new ReleaseRegex();
     $page = new Page();
     $nfo = new Nfo();
     $retcount = 0;
     echo $s->getLicense();
     echo "\n\nStarting release update process (" . date("Y-m-d H:i:s") . ")\n";
     if (!file_exists($page->site->nzbpath)) {
         echo "Bad or missing nzb directory - " . $page->site->nzbpath;
         return;
     }
     $this->checkRegexesUptoDate($page->site->latestregexurl, $page->site->latestregexrevision, $page->site->newznabID);
     //
     // Get all regexes for all groups which are to be applied to new binaries
     // in order of how they should be applied
     //
     $regexrows = $relreg->get();
     foreach ($regexrows as $regexrow) {
         echo "Applying regex " . $regexrow["ID"] . " for group " . ($regexrow["groupname"] == "" ? "all" : $regexrow["groupname"]) . "\n";
         $groupmatch = "";
         //
         // Groups ending in * need to be like matched when getting out binaries for groups and children
         //
         if (preg_match("/\\*\$/i", $regexrow["groupname"])) {
             $groupname = substr($regexrow["groupname"], 0, -1);
             $resgrps = $db->query(sprintf("select ID from groups where name like %s ", $db->escapeString($groupname . "%")));
             foreach ($resgrps as $resgrp) {
                 $groupmatch .= " groupID = " . $resgrp["ID"] . " or ";
             }
             $groupmatch .= " 1=2 ";
         } elseif ($regexrow["groupname"] != "") {
             $resgrp = $db->queryOneRow(sprintf("select ID from groups where name = %s ", $db->escapeString($regexrow["groupname"])));
             //
             // if group not found, its a regex for a group we arent indexing.
             //
             if ($resgrp) {
                 $groupmatch = " groupID = " . $resgrp["ID"];
             } else {
                 $groupmatch = " 1=2 ";
             }
         } else {
             $groupmatch = " 1=1 ";
         }
         // Get current mysql time for date comparison checks in case php is in a different time zone
         $currTime = $db->queryOneRow("SELECT NOW() as now");
         // Get out all binaries of STAGE0 for current group
         $arrNoPartBinaries = array();
         $resbin = $db->queryDirect(sprintf("SELECT binaries.ID, binaries.name, binaries.date, binaries.totalParts from binaries where (%s) and procstat = %d order by binaries.date asc", $groupmatch, Releases::PROCSTAT_NEW));
         while ($rowbin = mysql_fetch_assoc($resbin)) {
             if (preg_match($regexrow["regex"], $rowbin["name"], $matches)) {
                 $matches = array_map("trim", $matches);
                 if (isset($matches['reqid']) && ctype_digit($matches['reqid']) && (!isset($matches['name']) || empty($matches['name']))) {
                     $matches['name'] = $matches['reqid'];
                 }
                 // Check that the regex provided the correct parameters
                 if (!isset($matches['name']) || empty($matches['name'])) {
                     echo "regex applied which didnt return right number of capture groups - " . $regexrow["regex"] . "\n";
                     print_r($matches);
                     continue;
                 }
                 // If theres no number of files data in the subject, put it into a release if it was posted to usenet longer than five hours ago.
                 if (!isset($matches['parts']) && strtotime($currTime['now']) - strtotime($rowbin['date']) > 18000 || isset($arrNoPartBinaries[$matches['name']])) {
                     //
                     // Take a copy of the name of this no-part release found. This can be used
                     // next time round the loop to find parts of this set, but which have not yet reached 3 hours.
                     //
                     $arrNoPartBinaries[$matches['name']] = "1";
                     $matches['parts'] = "01/01";
                 }
                 if (isset($matches['name']) && isset($matches['parts'])) {
                     if (strpos($matches['parts'], '/') === false) {
                         $matches['parts'] = str_replace(array('-', '~', ' of '), '/', $matches['parts']);
                     }
                     $regcatid = "null ";
                     if ($regexrow["categoryID"] != "") {
                         $regcatid = $regexrow["categoryID"];
                     }
                     $reqid = " null ";
                     if (isset($matches['reqid'])) {
                         $reqid = $matches['reqid'];
                     }
                     //check if post is repost
                     if (preg_match('/(repost\\d?|re\\-?up)/i', $rowbin['name'], $repost) && !preg_match('/repost|re\\-?up/i', $matches['name'])) {
                         $matches['name'] .= ' ' . $repost[1];
                     }
                     $relparts = explode("/", $matches['parts']);
                     $db->query(sprintf("update binaries set relname = replace(%s, '_', ' '), relpart = %d, reltotalpart = %d, procstat=%d, categoryID=%s, regexID=%d, reqID=%s where ID = %d", $db->escapeString($matches['name']), $relparts[0], $relparts[1], Releases::PROCSTAT_TITLEMATCHED, $regcatid, $regexrow["ID"], $reqid, $rowbin["ID"]));
                 }
             }
         }
     }
     //
     // Move all binaries from releases which have the correct number of files on to the next stage.
     //
     echo "Stage 2\n";
     $result = $db->queryDirect(sprintf("SELECT relname, SUM(reltotalpart) AS reltotalpart, groupID, reqID, fromname, SUM(num) AS num, coalesce(g.minfilestoformrelease, s.minfilestoformrelease) as minfilestoformrelease FROM   ( SELECT relname, reltotalpart, groupID, reqID, fromname, COUNT(ID) AS num FROM binaries     WHERE procstat = %s     GROUP BY relname, reltotalpart, groupID, reqID, fromname    ) x left outer join groups g on g.ID = x.groupID inner join ( select value as minfilestoformrelease from site where setting = 'minfilestoformrelease' ) s GROUP BY relname, groupID, reqID, fromname", Releases::PROCSTAT_TITLEMATCHED));
     while ($row = mysql_fetch_assoc($result)) {
         $retcount++;
         //
         // Less than the site permitted number of files in a release. Dont discard it, as it may
         // be part of a set being uploaded.
         //
         if ($row["num"] < $row["minfilestoformrelease"]) {
             //echo "Number of files in release ".$row["relname"]." less than site/group setting (".$row['num']."/".$row["minfilestoformrelease"].")\n";
             $db->query(sprintf("update binaries set procattempts = procattempts + 1 where relname = %s and procstat = %d and groupID = %d and fromname = %s", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
         } elseif ($row["num"] >= $row["reltotalpart"]) {
             // Check that the binary is complete
             $binlist = $db->query(sprintf("SELECT ID, totalParts, date from binaries where relname = %s and procstat = %d and groupID = %d and fromname = %s", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
             $incomplete = false;
             foreach ($binlist as $rowbin) {
                 $binParts = $db->queryOneRow(sprintf("SELECT COUNT(ID) AS num FROM parts WHERE binaryID = %d", $rowbin['ID']));
                 if ($binParts['num'] < $rowbin['totalParts']) {
                     echo "binary " . $rowbin['ID'] . " from " . $row['relname'] . " has missing parts - " . $binParts['num'] . "/" . $rowbin['totalParts'] . " (" . number_format($binParts['num'] / $rowbin['totalParts'] * 100, 1) . "% complete)\n";
                     // Allow to binary to release if posted to usenet longer than four hours ago and we still don't have all the parts
                     if (strtotime($currTime['now']) - strtotime($rowbin['date']) > 14400) {
                         echo "allowing incomplete binary " . $rowbin['ID'] . "\n";
                     } else {
                         $incomplete = true;
                     }
                 }
             }
             if ($incomplete) {
                 echo "Incorrect number of parts " . $row["relname"] . "-" . $row["num"] . "-" . $row["reltotalpart"] . "\n";
                 $db->query(sprintf("update binaries set procattempts = procattempts + 1 where relname = %s and procstat = %d and groupID = %d and fromname = %s", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
             } elseif ($row['reqID'] != '' && $page->site->reqidurl != "") {
                 //
                 // Try and get the name using the group
                 //
                 $binGroup = $db->queryOneRow(sprintf("SELECT name FROM groups WHERE ID = %d", $row["groupID"]));
                 echo "Looking up " . $row['reqID'] . " in " . $binGroup['name'] . "... ";
                 $newtitle = $this->getReleaseNameForReqId($page->site->reqidurl, $page->site->newznabID, $binGroup["name"], $row["reqID"]);
                 //
                 // if the feed/group wasnt supported by the scraper, then just use the release name as the title.
                 //
                 if ($newtitle == "no feed") {
                     $newtitle = $row["relname"];
                     echo "Group not supported\n";
                 }
                 //
                 // Valid release with right number of files and title now, so move it on
                 //
                 if ($newtitle != "") {
                     $db->query(sprintf("update binaries set relname = %s, procstat=%d where relname = %s and procstat = %d and groupID = %d and fromname=%s", $db->escapeString($newtitle), Releases::PROCSTAT_READYTORELEASE, $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                 } else {
                     //
                     // Item not found, if the binary was added to the index yages ago, then give up.
                     //
                     $maxaddeddate = $db->queryOneRow(sprintf("SELECT NOW() as now, MAX(dateadded) as dateadded FROM binaries WHERE relname = %s and procstat = %d and groupID = %d and fromname=%s", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                     //
                     // If added to the index over 48 hours ago, give up trying to determine the title
                     //
                     if ($maxaddeddate['now'] - strtotime($maxaddeddate['dateadded']) > 60 * 60 * 48) {
                         $db->query(sprintf("update binaries set procstat=%d where relname = %s and procstat = %d and groupID = %d and fromname=%s", Releases::PROCSTAT_NOREQIDNAMELOOKUPFOUND, $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
                         echo "Not found in 48 hours\n";
                     }
                 }
             } else {
                 $db->query(sprintf("update binaries set procstat=%d where relname = %s and procstat = %d and groupID = %d and fromname=%s", Releases::PROCSTAT_READYTORELEASE, $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
             }
         } else {
             //echo "Incorrect number of files for ".$row["relname"]." (".$row["num"]."/".$row["reltotalpart"].")\n";
             $db->query(sprintf("update binaries set procattempts = procattempts + 1 where relname = %s and procstat = %d and groupID = %d and fromname=%s", $db->escapeString($row["relname"]), Releases::PROCSTAT_TITLEMATCHED, $row["groupID"], $db->escapeString($row["fromname"])));
         }
         if ($retcount % 10 == 0) {
             echo "-processed " . $retcount . " binaries stage two\n";
         }
     }
     $retcount = $nfocount = 0;
     echo "Stage 3\n";
     //
     // Get out all distinct relname, group from binaries of STAGE2
     //
     $result = $db->queryDirect(sprintf("SELECT relname, groupID, g.name as group_name, fromname, count(binaries.ID) as parts from binaries inner join groups g on g.ID = binaries.groupID where procstat = %d and relname is not null group by relname, g.name, groupID, fromname ORDER BY COUNT(binaries.ID) desc", Releases::PROCSTAT_READYTORELEASE));
     while ($row = mysql_fetch_assoc($result)) {
         $retcount++;
         //
         // Get the last post date and the poster name from the binary
         //
         $bindata = $db->queryOneRow(sprintf("select fromname, MAX(date) as date from binaries where relname = %s and procstat = %d and groupID = %d and fromname = %s group by fromname", $db->escapeString($row["relname"]), Releases::PROCSTAT_READYTORELEASE, $row["groupID"], $db->escapeString($row["fromname"])));
         //
         // Get all releases with the same name with a usenet posted date in a +1-1 date range.
         //
         $relDupes = $db->query(sprintf("select ID from releases where searchname = %s and (%s - INTERVAL 1 DAY < postdate AND %s + INTERVAL 1 DAY > postdate)", $db->escapeString($row["relname"]), $db->escapeString($bindata["date"]), $db->escapeString($bindata["date"])));
         if (count($relDupes) > 0) {
             $db->query(sprintf("update binaries set procstat = %d where relname = %s and procstat = %d and groupID = %d and fromname=%s ", Releases::PROCSTAT_DUPLICATE, $db->escapeString($row["relname"]), Releases::PROCSTAT_READYTORELEASE, $row["groupID"], $db->escapeString($row["fromname"])));
             continue;
         }
         //
         // Get total size of this release
         // Done in a big OR statement, not an IN as the mysql binaryID index on parts table
         // was not being used.
         //
         $totalSize = "0";
         $regexAppliedCategoryID = "";
         $regexIDused = "";
         $reqIDused = "";
         $relTotalParts = 0;
         $relCompletion = 0;
         $binariesForSize = $db->query(sprintf("select ID, categoryID, regexID, reqID, totalParts from binaries use index (ix_binary_relname) where relname = %s and procstat = %d and groupID = %d and fromname=%s", $db->escapeString($row["relname"]), Releases::PROCSTAT_READYTORELEASE, $row["groupID"], $db->escapeString($row["fromname"])));
         if (count($binariesForSize) > 0) {
             $sizeSql = "select sum(size) as totalSize, count(ID) as relParts from parts where (";
             foreach ($binariesForSize as $binSizeId) {
                 $sizeSql .= " binaryID = " . $binSizeId["ID"] . " or ";
                 //
                 // Get categoryID if one has been allocated to this
                 //
                 if ($binSizeId["categoryID"] != "" && $regexAppliedCategoryID == "") {
                     $regexAppliedCategoryID = $binSizeId["categoryID"];
                 }
                 //
                 // Get RegexID if one has been allocated to this
                 //
                 if ($binSizeId["regexID"] != "" && $regexIDused == "") {
                     $regexIDused = $binSizeId["regexID"];
                 }
                 //
                 // Get requestID if one has been allocated to this
                 //
                 if ($binSizeId["reqID"] != "" && $reqIDused == "") {
                     $reqIDused = $binSizeId["reqID"];
                 }
                 //
                 // Get number of expected parts
                 //
                 $relTotalParts += $binSizeId["totalParts"];
             }
             $sizeSql .= " 1=2) ";
             $temp = $db->queryOneRow($sizeSql);
             $totalSize = $temp["totalSize"] + 0 . "";
             $relCompletion = number_format($temp["relParts"] / $relTotalParts * 100, 1);
         }
         //
         // check the size of the release isnt less than the site/group minimum amount
         //
         $minfilesizeres = $db->queryOneRow(sprintf("SELECT coalesce(g.minsizetoformrelease, s.minsizetoformrelease) as minsizetoformrelease FROM groups g inner join ( select value as minsizetoformrelease from site where setting = 'minsizetoformrelease' ) s where g.ID = %d", $row["groupID"]));
         if ($minfilesizeres["minsizetoformrelease"] != 0 && $totalSize < $minfilesizeres["minsizetoformrelease"]) {
             echo "Skipping release - size of " . $totalSize . " bytes is smaller than site/group setting of " . $minfilesizeres["minsizetoformrelease"] . " bytes\n";
             $db->query(sprintf("update binaries set procstat = %d where relname = %s and procstat = %d and groupID = %d and fromname=%s ", Releases::PROCSTAT_MINRELEASESIZE, $db->escapeString($row["relname"]), Releases::PROCSTAT_READYTORELEASE, $row["groupID"], $db->escapeString($row["fromname"])));
             continue;
         }
         //
         // Insert the release
         //
         $relguid = md5(uniqid());
         if ($regexAppliedCategoryID == "") {
             $catId = $cat->determineCategory($row["group_name"], $row["relname"]);
         } else {
             $catId = $regexAppliedCategoryID;
         }
         if ($regexIDused == "") {
             $regexID = " null ";
         } else {
             $regexID = $regexIDused;
         }
         if ($reqIDused == "") {
             $reqID = " null ";
         } else {
             $reqID = $reqIDused;
         }
         //Clean release name
         $cleanArr = array('#', '@', '$', '%', '^', '§', '¨', '©', 'Ö');
         $cleanRelName = str_replace($cleanArr, '', $row['relname']);
         $relid = $db->queryInsert(sprintf("insert into releases (name, searchname, totalpart, groupID, adddate, guid, categoryID, regexID, rageID, postdate, fromname, size, reqID, passwordstatus, completion, haspreview) values (%s, %s, %d, %d, now(), %s, %d, %d, -1, %s, %s, %s, %s, %d, %f, %d)", $db->escapeString($cleanRelName), $db->escapeString($cleanRelName), $row["parts"], $row["groupID"], $db->escapeString($relguid), $catId, $regexID, $db->escapeString($bindata["date"]), $db->escapeString($bindata["fromname"]), $totalSize, $reqID, $page->site->checkpasswordedrar > 0 ? -1 : 0, $relCompletion > 100 ? 100 : $relCompletion, -1));
         echo "Added release " . $cleanRelName . "\n";
         //
         // Tag every binary for this release with its parent release id
         // remove the release name from the binary as its no longer required
         //
         $db->query(sprintf("update binaries set procstat = %d, releaseID = %d where relname = %s and procstat = %d and groupID = %d and fromname=%s", Releases::PROCSTAT_RELEASED, $relid, $db->escapeString($row["relname"]), Releases::PROCSTAT_READYTORELEASE, $row["groupID"], $db->escapeString($row["fromname"])));
         //
         // Find an .nfo in the release
         //
         $relnfo = $nfo->determineReleaseNfo($relid);
         if ($relnfo !== false) {
             $nfo->addReleaseNfo($relid, $relnfo['ID']);
             $nfocount++;
         }
         //
         // Write the nzb to disk
         //
         $nzb->writeNZBforReleaseId($relid, $relguid, $cleanRelName, $catId, $nzb->getNZBPath($relguid, $page->site->nzbpath, true));
         if ($retcount % 5 == 0) {
             echo "-processed " . $retcount . " releases stage three\n";
         }
     }
     echo "Found " . $nfocount . " nfos in " . $retcount . " releases\n";
     $postprocess = new PostProcess(true);
     $postprocess->processAll();
     //
     // Get the current datetime again, as using now() in the housekeeping queries prevents the index being used.
     //
     $currTime = $db->queryOneRow("SELECT NOW() as now");
     //
     // aggregate the releasefiles upto the releases.
     //
     $db->query("UPDATE releases INNER JOIN (SELECT releaseID, COUNT(ID) AS num FROM releasefiles GROUP BY releaseID) b ON b.releaseID = releases.ID and releases.rarinnerfilecount = 0 SET rarinnerfilecount = b.num");
     //
     // Tidy away any binaries which have been attempted to be grouped into
     // a release more than x times
     //
     echo "\nTidying away binaries which cant be grouped after " . $page->site->attemptgroupbindays . " days\n";
     $db->query(sprintf("update binaries set procstat = %d where procstat = %d and dateadded < %s - interval %d day ", Releases::PROCSTAT_WRONGPARTS, Releases::PROCSTAT_NEW, $db->escapeString($currTime["now"]), $page->site->attemptgroupbindays));
     //
     // Delete any parts and binaries which are older than the site's retention days
     //
     echo "Deleting parts which are older than " . $page->site->rawretentiondays . " days\n";
     $db->query(sprintf("delete from parts where dateadded < %s - interval %d day", $db->escapeString($currTime["now"]), $page->site->rawretentiondays));
     echo "Deleting binaries which are older than " . $page->site->rawretentiondays . " days\n";
     $db->query(sprintf("delete from binaries where dateadded < %s - interval %d day", $db->escapeString($currTime["now"]), $page->site->rawretentiondays));
     //
     // Delete any releases which are older than site's release retention days
     //
     if ($page->site->releaseretentiondays != 0) {
         echo "Determining any releases past retention to be deleted.\n\n";
         $result = $db->query(sprintf("select ID from releases where postdate < %s - interval %d day", $db->escapeString($currTime["now"]), $page->site->releaseretentiondays));
         foreach ($result as $row) {
             $this->delete($row["ID"]);
         }
     }
     //
     // Delete any passworded releases
     //
     if ($page->site->deletepasswordedrelease == 1) {
         echo "Determining any passworded releases to be deleted.\n\n";
         $result = $db->query("select ID from releases where passwordstatus > 0");
         foreach ($result as $row) {
             $this->delete($row["ID"]);
         }
     }
     echo "Processed " . $retcount . " releases\n\n";
     return $retcount;
 }