/** * Process images on S3 instead of from the images web server dir */ private static function processS3Images() { $s3 = new S3(WH_AWS_WIKIPHOTO_ACCESS_KEY, WH_AWS_WIKIPHOTO_SECRET_KEY); //$file = '/tmp/whp'; //if (!file_exists($file)) { $articles = self::getS3Articles($s3, self::AWS_BUCKET); $processed = self::dbGetArticlesUpdatedAll(); //$out = yaml_emit(array($articles, $processed)); //file_put_contents($file, $out); //} else { //list($articles, $processed) = yaml_parse(file_get_contents($file)); //} // process all articles foreach ($articles as $id => $details) { $debug = self::$debugArticleID; if ($debug && $debug != $id) { continue; } if (@$details['err']) { if (!$processed[$id]) { self::dbSetArticleProcessed($id, $details['user'], $details['err'], '', '', 0, 0, 0); } continue; } // if article needs to be processed again because new files were // uploaded, but article has already been processed, we should // just flag as a retry attempt if (!$debug && isset($processed[$id]) && !$processed[$id]['retry'] && $processed[$id]['processed'] < $details['time']) { if ($details['time'] >= self::REPROCESS_EPOCH) { $processed[$id]['retry'] = 1; $processed[$id]['error'] = ''; } else { // don't reprocess stuff from before a certain point in time continue; } } // if this article was already processed, and nothing about its // images has changes, and it's not set to be retried, don't // process it again if (!$debug && isset($processed[$id]) && !$processed[$id]['retry'] && $processed[$id]['processed'] > $details['time']) { continue; } // if article is not on Wikiphoto article exclude list if (WikiPhoto::checkExcludeList($id)) { $err = 'Article was found on Wikiphoto EXCLUDE list'; self::dbSetArticleProcessed($id, $details['user'], $err, '', '', 0, 0, 0); continue; } // pull zip file into staging area $stageDir = ''; $imageList = array(); if ($details['zip']) { $prefix = $details['user'] . '/'; $zipFile = $id . '.zip'; $files = array($zipFile); list($err, $stageDir) = self::pullFiles($id, $s3, $prefix, $files); if (!$err) { list($err, $files) = self::unzip($stageDir, $zipFile); } if (!$err) { foreach ($files as $file) { $imageList[] = array('name' => basename($file), 'filename' => $file); } } } else { // no zip -- ignore continue; } if (!$err && in_array($id, self::$excludeArticles)) { $err = 'Forced skipping this article because there was an repeated error when processing it'; } if (!$err) { $warning = @$details['warning']; list($err, $title) = self::processImages($id, $details['user'], $imageList, $warning); } else { self::dbSetArticleProcessed($id, $details['user'], $err, '', '', 0, 0, 0); } if ($stageDir) { self::safeCleanupDir($stageDir); } $titleStr = $title ? ' (' . $title->getText() . ')' : ''; $errStr = $err ? ' err=' . $err : ''; $imageCount = count($imageList); print date('Y/M/d H:i') . " processed: {$details['user']}/{$id}{$titleStr} images={$imageCount}{$errStr}\n"; } }
/** * Process images on S3 instead of from the images web server dir */ private function processS3Media() { $s3 = new S3(WH_AWS_WIKIVISUAL_ACCESS_KEY, WH_AWS_WIKIVISUAL_SECRET_KEY); // $file = '/tmp/whp'; // if (!file_exists($file)) { $articles = $this->getS3Articles($s3, self::AWS_BUCKET); $processed = $this->dbGetArticlesUpdatedAll(); // $out = yaml_emit(array($articles, $processed)); // file_put_contents($file, $out); // } else { // list($articles, $processed) = yaml_parse(file_get_contents($file)); // } // process all articles $articlesProcessed = 0; foreach ($articles as $id => $details) { $debug = self::$debugArticleID; if ($debug && $debug != $id) { continue; } if (@$details['err']) { if (!$processed[$id]) { self::dbSetArticleProcessed($id, $details['user'], $details['err'], '', '', 0, 0, 0, 0, self::STATUS_ERROR, 0, ''); } continue; } // if article needs to be processed again because new files were // uploaded, but article has already been processed, we should // just flag as a retry attempt if (!$debug && isset($processed[$id]) && !$processed[$id]['retry'] && $processed[$id]['processed'] < $details['time']) { if ($details['time'] >= self::REPROCESS_EPOCH) { $processed[$id]['retry'] = 1; $processed[$id]['error'] = ''; } else { self::d("don't reprocess stuff from before a certain point in time: Article id :" . $id); // don't reprocess stuff from before a certain point in time continue; } } // if this article was already processed, and nothing about its // images has changes, and it's not set to be retried, don't // process it again if (!$debug && isset($processed[$id]) && !$processed[$id]['retry'] && $processed[$id]['processed'] > $details['time']) { self::d("if this article was already processed, and nothing about its images has changes, and it's not set to be retried, don't process it again:" . $id . ", processed[id]['processed']=" . $processed[$id]['processed'] . " > details['time']=" . $details['time']); continue; } // if article is not on Wikiphoto article exclude list if (WikiPhoto::checkExcludeList($id)) { $err = 'Article was found on Wikiphoto EXCLUDE list'; self::dbSetArticleProcessed($id, $details['user'], $err, '', '', 0, 0, 0, 0, self::STATUS_ERROR, 0, ''); continue; } // pull zip file into staging area $stageDir = ''; $photoList = array(); $videoList = array(); if ($details['zip']) { $prefix = $details['user'] . '/'; $zipFile = $id . '.zip'; $files = array($zipFile); list($err, $stageDir) = $this->pullFiles($id, $s3, $prefix, $files); if (!$err) { list($err, $files) = $this->unzip($stageDir, $zipFile); } if (!$err) { list($photoList, $videoList) = self::splitSrcMediaFileList($files); } } else { // no zip -- ignore continue; } if (!$err && in_array($id, self::$excludeArticles)) { $err = 'Forced skipping this article because there was an repeated error when processing it'; } self::d("PhotoList size " . count($photoList) . ", VideoList size " . count($videoList) . " err={$err}"); $isHybridMedia = false; $photoCnt = 0; $vidCnt = 0; if (!$err) { $warning = @$details['warning']; $photoCnt = count($photoList); $vidCnt = count($videoList); self::dbSetArticleProcessed($id, $details['user'], $err, $warning, '', $vidCnt, $photoCnt, 0, 0, self::STATUS_PROCESSING_UPLOADS, 0, $stageDir); $isHybridMedia = $photoCnt > 0 && $vidCnt > 0; self::d("isHybridMedia={$isHybridMedia}"); //start processing uploads if ($photoCnt > 0 && $vidCnt <= 0) { list($err, $title, $warning, $url, $photoCnt, $replaced) = $this->imageTranscoder->processMedia($id, $details['user'], $photoList, $warning, $isHybridMedia); $this->updateArticleStatusPhotoProcessed($id, $err, $warning, $url, $photoCnt, $replaced, true); } else { if (!$err && $vidCnt > 0) { self::d("Processing mp4Transcoder->processMedia"); list($err, $url, $status) = $this->mp4Transcoder->processMedia($id, $details['user'], $videoList, $warning, $isHybridMedia); $this->updateArticleStatusVideoTranscoding($id, $err, $warning, $url, $status); } } $articlesProcessed++; } else { self::dbSetArticleProcessed($id, $details['user'], $err, '', '', 0, 0, 0, 0, self::STATUS_ERROR, 0, ''); } //don't cleanup if isHybridMedia is present and zip file contains images. if (!empty($stageDir) && $isHybridMedia === false) { self::safeCleanupDir($stageDir); } $titleStr = $title ? ' (' . $title->getText() . ')' : ''; $errStr = $err ? ', err=' . $err : ''; $mediaCount = count($files); self::i("processed: {$details['user']}/{$id}{$titleStr} original mediaFilesCount={$mediaCount} {$errStr}"); if (self::$DEBUG !== false && self::$exitAfterNumArticles > 0 && $articlesProcessed >= self::$exitAfterNumArticles) { self::d("articlesProcessed {$articlesProcessed} >= self::\$exitAfterNumArticles " . self::$exitAfterNumArticles . ", hence stopping further processing of articles if there are any."); break; } } }