/**
  * @covers ::normalize
  */
 public function testNormalize()
 {
     // What the page URL
     $assertions[0][0] = 'WPD:Infrastructure/proposals/Site Map';
     // Desired sanitized URL
     $assertions[0][1] = 'WPD/Infrastructure/proposals/Site_Map';
     // What would be the file name to read/write from
     $assertions[0][2] = 'out/foo/bar/WPD/Infrastructure/proposals/Site_Map/index.bazz';
     $assertions[1][0] = 'WPD:Doc Sprints';
     $assertions[1][1] = 'WPD/Doc_Sprints';
     $assertions[1][2] = 'out/foo/bar/WPD/Doc_Sprints/index.bazz';
     $assertions[2][0] = 'tutorials/What is CSS?';
     $assertions[2][1] = 'tutorials/What_is_CSS';
     $assertions[2][2] = 'out/foo/bar/tutorials/What_is_CSS/index.bazz';
     $assertions[3][0] = 'Tutorials/HTML forms - the basics';
     $assertions[3][1] = 'Tutorials/HTML_forms_-_the_basics';
     $assertions[3][2] = 'out/foo/bar/Tutorials/HTML_forms_-_the_basics/index.bazz';
     $assertions[4][0] = 'ja/concepts/programming/programming basics';
     $assertions[4][1] = 'ja/concepts/programming/programming_basics';
     $assertions[4][2] = 'out/foo/bar/ja/concepts/programming/programming_basics/index.bazz';
     $assertions[5][0] = 'concepts/Internet and Web/the history of the web/tr';
     $assertions[5][1] = 'concepts/Internet_and_Web/the_history_of_the_web/tr';
     $assertions[5][2] = 'out/foo/bar/concepts/Internet_and_Web/the_history_of_the_web/tr.bazz';
     $assertions[6][0] = 'tutorials/Raw WebGL 101 - Part 4: Textures';
     $assertions[6][1] = 'tutorials/Raw_WebGL_101_-_Part_4_Textures';
     $assertions[6][2] = 'out/foo/bar/tutorials/Raw_WebGL_101_-_Part_4_Textures/index.bazz';
     $assertions[7][0] = 'css/selectors/pseudo-classes/:optional';
     $assertions[7][1] = 'css/selectors/pseudo-classes/optional';
     $assertions[7][2] = 'out/foo/bar/css/selectors/pseudo-classes/optional/index.bazz';
     $assertions[8][0] = 'css/selectors/pseudo-classes/:nth-of-type(n)';
     $assertions[8][1] = 'css/selectors/pseudo-classes/nth-of-type';
     $assertions[8][2] = 'out/foo/bar/css/selectors/pseudo-classes/nth-of-type/index.bazz';
     $assertions[9][0] = 'css/selectors/pseudo-classes/:lang(c)';
     $assertions[9][1] = 'css/selectors/pseudo-classes/lang';
     $assertions[9][2] = 'out/foo/bar/css/selectors/pseudo-classes/lang/index.bazz';
     // False positive translated (tr HTML element that happens to conflate with the Turkish language code)
     $assertions[10][0] = 'html/elements/tr';
     $assertions[10][1] = 'html/elements/tr';
     $assertions[10][2] = 'out/foo/bar/html/elements/tr/index.bazz';
     // True positive translated document (Turkish version of the tr HTML element)
     $assertions[11][0] = 'html/elements/tr/tr';
     $assertions[11][1] = 'html/elements/tr/tr';
     $assertions[11][2] = 'out/foo/bar/html/elements/tr/tr.bazz';
     // Please, lets fix those too!!
     $assertions[12][0] = 'html/attributes/align (Table, iframe elements)';
     $assertions[12][1] = 'html/attributes/align_Table_iframe_elements';
     $assertions[12][2] = 'out/foo/bar/html/attributes/align_Table_iframe_elements/index.bazz';
     $mockDocument = '<page>
                 <title>overload me</title>
                 <revision>
                     <id>44</id>
                     <timestamp>2011-08-21T13:21:41Z</timestamp>
                     <contributor>
                         <username>Jdoe</username>
                         <id>42</id>
                     </contributor>
                     <text xml:space="preserve" bytes="20">Use me to overload title element!</text>
                 </revision>
             </page>';
     foreach ($assertions as $assertion) {
         $mock = new SimpleXMLElement($mockDocument);
         $mock->title = $assertion[0];
         // Let’s overload the title for what we want
         $document = new MediaWikiDocument($mock);
         $this->assertSame($assertion[0], $document->getTitle());
         $this->assertSame($assertion[1], $document->getName());
         $file = new GitCommitFileRevision($document, 'out/foo/bar/', '.bazz');
         $this->assertSame($assertion[2], $file->getName());
     }
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $this->users = [];
     $this->filesystem = new Filesystem();
     $this->titleFilter = new TitleFilter();
     $displayIndex = $input->getOption('indexes');
     $displayAuthor = $input->getOption('display-author');
     $maxHops = (int) $input->getOption('max-pages');
     // Maximum number of pages we go through
     $revMaxHops = (int) $input->getOption('max-revs');
     // Maximum number of revisions per page we go through
     $listMissed = $input->getOption('missed');
     $counter = 0;
     // Increment the number of pages we are going through
     $redirects = [];
     $pages = [];
     $urlParts = [];
     $missedIndexes = [];
     $urlsWithContent = [];
     $moreThanHundredRevs = [];
     $translations = [];
     $sanity_redirs = [];
     $directlyOnRoot = [];
     $rev_count = [];
     // So we can know what’s the average
     // Pages we have to make sure aren’t duplicate on the CMS prior
     // to the final migration.
     $temporary_acceptable_duplicates = [];
     //$temporary_acceptable_duplicates[] = 'css/selectors/pseudo-classes/:lang'; // DONE
     if ($listMissed === true) {
         $output->writeln('We are going to try to give you XML indexes to use for --retry=..., we will therefore limit the revision loops to one.');
         $missed_file = DATA_DIR . '/missed.yml';
         if (realpath($missed_file) === false) {
             throw new Exception(sprintf('Could not find missed file at %s', $missed_file));
         }
         $missedFileContents = file_get_contents($missed_file);
         $parser = new Yaml\Parser();
         try {
             $missed = $parser->parse($missedFileContents);
         } catch (Exception $e) {
             throw new Exception(sprintf('Could not get file %s contents to be parsed as YAML. Is it in YAML format?', $missed_file), null, $e);
         }
         if (!isset($missed['missed'])) {
             throw new Exception('Please ensure missed.yml has a list of titles under a "missed:" top level key');
         }
         $revMaxHops = 1;
         $this->missed = $missed['missed'];
     }
     /**
      * Last minute redirects. Order matters.
      */
     $redirects['after'] = 'css/selectors/pseudo-elements/after';
     $redirects['tutorials/What_is_CSS'] = 'tutorials/learning_what_css_is';
     $redirects['html/attributes/type type (a, link, embed)'] = 'html/attributes/type';
     /* -------------------- Author --------------------
      *
      * Author array of MediaWikiContributor objects with $this->users[$uid],
      * where $uid is MediaWiki user_id.
      *
      * You may have to increase memory_limit value,
      * but we’ll load this only once.
      **/
     $users_file = DATA_DIR . '/users.json';
     $users_loop = json_decode(file_get_contents($users_file), 1);
     foreach ($users_loop as &$u) {
         $uid = (int) $u['user_id'];
         $this->users[$uid] = new MediaWikiContributor($u);
         unset($u);
         // Dont fill too much memory, if that helps.
     }
     /* -------------------- /Author -------------------- **/
     /* -------------------- XML source -------------------- **/
     $file = DATA_DIR . '/dumps/main_full.xml';
     $streamer = XmlStringStreamer::createStringWalkerParser($file);
     /* -------------------- /XML source -------------------- **/
     while ($node = $streamer->getNode()) {
         if ($maxHops > 0 && $maxHops === $counter) {
             $output->writeln(sprintf('Reached desired maximum of %d loops', $maxHops) . PHP_EOL . PHP_EOL);
             break;
         }
         $pageNode = new SimpleXMLElement($node);
         if (isset($pageNode->title)) {
             $wikiDocument = new MediaWikiDocument($pageNode);
             $persistable = new GitCommitFileRevision($wikiDocument, 'out/content/', '.md');
             $title = $wikiDocument->getTitle();
             $normalized_location = $wikiDocument->getName();
             $file_path = $this->titleFilter->filter($persistable->getName());
             $redirect_to = $this->titleFilter->filter($wikiDocument->getRedirect());
             // False if not a redirect, string if it is
             $is_translation = $wikiDocument->isTranslation();
             $language_code = $wikiDocument->getLanguageCode();
             $language_name = $wikiDocument->getLanguageName();
             $revs = $wikiDocument->getRevisions()->count();
             $output->writeln(sprintf('"%s":', $title));
             if ($displayIndex === true) {
                 $output->writeln(sprintf('  - index: %d', $counter));
             }
             $output->writeln(sprintf('  - normalized: %s', $normalized_location));
             $output->writeln(sprintf('  - file: %s', $file_path));
             if ($wikiDocument->hasRedirect() === true) {
                 $output->writeln(sprintf('  - redirect_to: %s', $redirect_to));
             } else {
                 $urlsWithContent[] = $title;
                 foreach (explode('/', $normalized_location) as $urlDepth => $urlPart) {
                     $urlPartKey = strtolower($urlPart);
                     $urlParts[$urlPartKey] = $urlPart;
                     $urlPartsAll[$urlPartKey][] = $urlPart;
                 }
             }
             if ($is_translation === true) {
                 $output->writeln(sprintf('  - lang: %s (%s)', $language_code, $language_name));
             }
             if ($listMissed === true && in_array($normalized_location, $this->missed)) {
                 $missedIndexes[$counter] = $title;
             }
             $output->writeln(sprintf('  - revs: %d', $revs));
             $output->writeln(sprintf('  - revisions:'));
             $revList = $wikiDocument->getRevisions();
             $revLast = $wikiDocument->getLatest();
             $revCounter = 0;
             /* ----------- REVISION --------------- **/
             for ($revList->rewind(); $revList->valid(); $revList->next()) {
                 if ($revMaxHops > 0 && $revMaxHops === $revCounter) {
                     $output->writeln(sprintf('    - stop: Reached maximum %d revisions', $revMaxHops) . PHP_EOL . PHP_EOL);
                     break;
                 }
                 $wikiRevision = $revList->current();
                 $revision_id = $wikiRevision->getId();
                 /* -------------------- Author -------------------- **/
                 // An edge case where MediaWiki may give author as user_id 0, even though we dont have it
                 // so we’ll give the first user instead.
                 $contributor_id = $wikiRevision->getContributorId() === 0 ? 1 : $wikiRevision->getContributorId();
                 if (isset($this->users[$contributor_id])) {
                     $contributor = clone $this->users[$contributor_id];
                     // We want a copy, because its specific to here only anyway.
                     $wikiRevision->setContributor($contributor, false);
                 } else {
                     // In case we didn’t find data for $this->users[$contributor_id]
                     $contributor = clone $this->users[1];
                     // We want a copy, because its specific to here only anyway.
                     $wikiRevision->setContributor($contributor, false);
                 }
                 /* -------------------- /Author -------------------- **/
                 $output->writeln(sprintf('    - id: %d', $revision_id));
                 if ($displayIndex === true) {
                     $output->writeln(sprintf('      index: %d', $revCounter));
                 }
                 $persistArgs = $persistable->setRevision($wikiRevision)->getArgs();
                 foreach ($persistArgs as $argKey => $argVal) {
                     if ($argKey === 'message') {
                         $argVal = trim(mb_strimwidth($argVal, strpos($argVal, ': ') + 2, 100));
                     }
                     if ($argKey === 'message' && empty($argVal)) {
                         // Lets not pollute report with empty messages
                         continue;
                     }
                     if ($displayAuthor === false && $argKey === 'author') {
                         continue;
                     }
                     $output->writeln(sprintf('      %s: %s', $argKey, $argVal));
                 }
                 if ($revLast->getId() === $wikiRevision->getId() && $wikiDocument->hasRedirect()) {
                     $output->writeln('      is_last_and_has_redirect: True');
                 }
                 ++$revCounter;
             }
             /* ----------- REVISION --------------- */
             $rev_count[] = $revs;
             // Which pages are directly on /wiki/foo. Are there some we
             // should move elsewhere such as the glossary items?
             if (count(explode('/', $title)) == 1 && $wikiDocument->hasRedirect() === false) {
                 $directlyOnRoot[] = $title;
             }
             if ($revs > 99) {
                 $moreThanHundredRevs[] = sprintf('%s (%d)', $title, $revs);
             }
             if ($is_translation === true && $wikiDocument->hasRedirect() === false) {
                 $translations[] = $title;
             }
             // The ones with invalid URL characters that shouldn’t be part of
             // a page name because they may confuse with their natural use (:,(,),!,?)
             if ($title !== $normalized_location && $wikiDocument->hasRedirect() === false) {
                 $sanity_redirs[$title] = $normalized_location;
             }
             // We have a number of pages, some of them had been
             // deleted or erased with a redirect left behind.
             //
             // Since we want to write to files all pages that currently
             // has content into a filesystem, we have to generate a file
             // name that can be stored into a filesystem. We therefore have
             // to normalize the names.
             //
             // We don’t want to have two entries with the same name.
             //
             // If a redirect (i.e. an empty file) exist, let’s set keep it
             // separate from the pages that still has content.
             //
             // Sanity check;
             // 1. Get list of redirects
             // 2. Get list of pages
             //
             // If we have a page duplicate, throw an exception!
             if ($wikiDocument->hasRedirect() === true) {
                 // Pages we know are redirects within MediaWiki, we won’t
                 // pass them within the $pages aray because they would be
                 // empty content with only a redirect anyway.
                 if ($normalized_location !== $redirect_to) {
                     $redirects[str_replace('_', ' ', $normalized_location)] = $redirect_to;
                 }
             } elseif (!in_array($normalized_location, array_keys($pages))) {
                 // Pages we know has content, lets count them!
                 if ($wikiDocument->hasRedirect() === false) {
                     $pages[$normalized_location] = $title;
                 }
             } elseif (in_array($title, $temporary_acceptable_duplicates)) {
                 // Lets not throw, we got that covered.
             } else {
                 // Hopefully we should never encounter this.
                 $previous = $pages[$normalized_location];
                 $duplicatePagesExceptionText = 'We have duplicate entry for %s it ' . 'would be stored in %s which would override content of %s';
                 throw new Exception(sprintf($duplicatePagesExceptionText, $title, $file_path, $previous));
             }
             $output->writeln(PHP_EOL . PHP_EOL);
             ++$counter;
         }
     }
     /*
      * Work some numbers on number of edits
      *
      * - Average
      * - Median
      */
     $total_edits = 0;
     sort($rev_count);
     $edit_average = array_sum($rev_count) / $counter;
     // Calculate median
     $value_in_middle = floor(($counter - 1) / 2);
     if ($counter % 2) {
         // odd number, middle is the median
         $edit_median = $rev_count[$value_in_middle];
     } else {
         // even number, calculate avg of 2 medians
         $low = $rev_count[$value_in_middle];
         $high = $rev_count[$value_in_middle + 1];
         $edit_median = ($low + $high) / 2;
     }
     $numbers = array('Numbers:');
     $numbers[] = sprintf('  - "iterations": %d', $counter);
     $numbers[] = sprintf('  - "content pages": %d', count($pages));
     $numbers[] = sprintf('  - "redirects": %d', count($redirects));
     $numbers[] = sprintf('  - "translated": %d', count($translations));
     $numbers[] = sprintf('  - "not in a directory": %d', count($directlyOnRoot));
     $numbers[] = sprintf('  - "redirects for URL sanity": %d', count($sanity_redirs));
     $numbers[] = sprintf('  - "edits average": %d', $edit_average);
     $numbers[] = sprintf('  - "edits median": %d', $edit_median);
     $this->filesystem->dumpFile('reports/numbers.txt', implode($numbers, PHP_EOL));
     $this->filesystem->dumpFile('reports/hundred_revs.txt', implode($moreThanHundredRevs, PHP_EOL));
     natcasesort($translations);
     $this->filesystem->dumpFile('reports/translations.txt', implode(PHP_EOL, $translations));
     natcasesort($directlyOnRoot);
     $this->filesystem->dumpFile('reports/directly_on_root.txt', implode(PHP_EOL, $directlyOnRoot));
     natcasesort($urlsWithContent);
     $this->filesystem->dumpFile('reports/url_all.txt', implode(PHP_EOL, $urlsWithContent));
     natcasesort($urlParts);
     $this->filesystem->dumpFile('reports/url_parts.txt', implode(PHP_EOL, $urlParts));
     // Creating list for https://github.com/webplatform/mediawiki-conversion/issues/2
     ksort($urlPartsAll);
     $urlPartsAllOut = array('All words that exists in an URL, and the different ways they are written (needs harmonizing!):');
     foreach ($urlPartsAll as $urlPartsAllKey => $urlPartsAllRow) {
         $urlPartsAllEntryUnique = array_unique($urlPartsAllRow);
         if (count($urlPartsAllEntryUnique) > 1) {
             $urlPartsAllOut[] = sprintf(' - %s', implode(', ', $urlPartsAllEntryUnique));
         }
     }
     $this->filesystem->dumpFile('reports/url_parts_variants.txt', implode(PHP_EOL, $urlPartsAllOut));
     ksort($redirects, SORT_NATURAL | SORT_FLAG_CASE);
     ksort($sanity_redirs, SORT_NATURAL | SORT_FLAG_CASE);
     $nginx_redirects = [];
     $nginx_redirects[] = 'rewrite ^/wiki/((Special|Template|User).*) /disabled?r=$1 permanent;';
     $nginx_redirects[] = 'rewrite ^/w/(.*) /disabled?r=$1 permanent;';
     $nginx_redirects[] = 'rewrite ^/$ /Main_Page permanent;';
     $nginx_redirects[] = 'rewrite ^/wiki/?$ /Main_Page permanent;';
     //                             /wiki/tutorials/canvas/canvas_tutorial
     //$nginx_redirects[] = 'rewrite ^/wiki/canvas/tutorial(.*)$ /wiki/tutorials/canvas$1 permanent;';
     $nginx_redirects[] = 'rewrite ^/wiki/WPD\\:Community$ /community permanent;';
     $nginx_redirects[] = 'rewrite ^/wiki/WPD\\:Contributors_Guide$ /contribute permanent;';
     $nginx_esc[':'] = '\\:';
     $nginx_esc['('] = '\\(';
     $nginx_esc[')'] = '\\)';
     $nginx_esc[','] = '\\,';
     $nginx_esc[' '] = '(\\ |_)';
     // Ordering matter, otherwise the () will be escaped and we want them here!
     $prepare_nginx_redirects = array_merge($sanity_redirs, $redirects);
     foreach ($prepare_nginx_redirects as $url => $redirect_to) {
         // NGINX Case-insensitive redirect? Its done through (?i)! Should be documented!!!
         $nginx_redirects[] = sprintf('rewrite (?i)^/wiki/%s$ /%s permanent;', str_replace(array_keys($nginx_esc), $nginx_esc, $url), $redirect_to);
     }
     $nginx_redirects[] = 'rewrite ^/wiki/(.*) /$1 permanent;';
     // Has to be the last!
     $this->filesystem->dumpFile('reports/nginx_redirects.map', implode(PHP_EOL, $nginx_redirects));
     $sanity_redirects_out = array('URLs to return new Location (from => to):');
     foreach ($sanity_redirs as $title => $sanitized) {
         $sanity_redirects_out[] = sprintf(' - "%s": "%s"', $title, $sanitized);
     }
     $this->filesystem->dumpFile('reports/sanity_redirects.txt', implode(PHP_EOL, $sanity_redirects_out));
     $redirects_out = array('Redirects (from => to):');
     foreach ($redirects as $url => $redirect_to) {
         $redirects_out[] = sprintf(' - "%s": "%s"', $url, $redirect_to);
     }
     $this->filesystem->dumpFile('reports/redirects.txt', implode(PHP_EOL, $redirects_out));
     if ($listMissed === true) {
         $yaml = new Yaml\Dumper();
         $yaml->setIndentation(2);
         try {
             $missed_out = $yaml->dump($missedIndexes, 3, 0, false, false);
         } catch (Exception $e) {
             $missed_out = sprintf('Could not create YAML out of missedIndexes array; Error was %s', $e->getMessage());
         }
         $this->filesystem->dumpFile('reports/missed_retry_argument.txt', 'app/console mediawiki:run 3 --retry=' . implode(',', array_keys($missedIndexes)));
         $this->filesystem->dumpFile('reports/missed_entries.yml', 'Missed:' . PHP_EOL . $missed_out);
         $output->writeln('Created missed_retry_argument.txt and missed_entries.yml in reports/ you can try to recover!');
     }
 }
 protected function execute(InputInterface $input, OutputInterface $output)
 {
     $this->users = [];
     $this->filesystem = new Filesystem();
     $this->titleFilter = new TitleFilter();
     $passNbr = (int) $input->getArgument('pass');
     $retries = explode(',', $input->getOption('retry'));
     $resumeAt = (int) $input->getOption('resume-at');
     $maxHops = (int) $input->getOption('max-pages');
     // Maximum number of pages we go through
     $revMaxHops = (int) $input->getOption('max-revs');
     // Maximum number of revisions per page we go through
     $listMissed = $input->getOption('missed');
     $counter = 0;
     // Increment the number of pages we are going through
     $redirects = [];
     $pages = [];
     $urlParts = [];
     if (count($retries) >= 1 && $retries[0] !== '' && $passNbr !== 3) {
         throw new DomainException('Retry option is only supported at 3rd pass');
     }
     if ($listMissed === true && $passNbr === 3) {
         $missed_file = DATA_DIR . '/missed.yml';
         if (realpath($missed_file) === false) {
             throw new Exception(sprintf('Could not find missed file at %s', $missed_file));
         }
         $missedFileContents = file_get_contents($missed_file);
         $parser = new Yaml\Parser();
         try {
             $missed = $parser->parse($missedFileContents);
         } catch (Exception $e) {
             throw new Exception(sprintf('Could not get file %s contents to be parsed as YAML. Is it in YAML format?', $missed_file), null, $e);
         }
         if (!isset($missed['missed'])) {
             throw new Exception('Please ensure missed.yml has a list of titles under a "missed:" top level key');
         }
         $this->missed = $missed['missed'];
     } elseif ($listMissed === true && $passNbr !== 3) {
         throw new DomainException('Missed option is only supported at 3rd pass');
     }
     $repoInitialized = realpath(GIT_OUTPUT_DIR . '/.git') === false ? false : true;
     $this->git = new GitRepository(realpath(GIT_OUTPUT_DIR));
     if ($repoInitialized === false) {
         $this->git->init()->execute();
     }
     if ($passNbr === 3) {
         /*
          * Your MediaWiki API URL
          */
         $apiUrl = MEDIAWIKI_API_ORIGIN . '/w/api.php?format=json&action=parse&prop=text|links|templates|';
         $apiUrl .= 'images|externallinks|categories|sections|headitems|displaytitle|iwlinks|properties&pst=1';
         $apiUrl .= '&disabletoc=true&disablepp=true&disableeditsection=true&preview=true&page=';
         // We are at conversion pass, instantiate our Converter!
         $this->converter = new MediaWikiToHtml();
         $this->converter->setApiUrl($apiUrl);
         sort($retries);
         if (count($retries) === 1 && $retries[0] === '') {
             unset($retries);
         }
     } else {
         unset($retries);
     }
     /* -------------------- Author --------------------
      *
      * Author array of MediaWikiContributor objects with $this->users[$uid],
      * where $uid is MediaWiki user_id.
      *
      * You may have to increase memory_limit value,
      * but we’ll load this only once.
      **/
     $users_file = DATA_DIR . '/users.json';
     $users_loop = json_decode(file_get_contents($users_file), 1);
     foreach ($users_loop as &$u) {
         $uid = (int) $u['user_id'];
         $this->users[$uid] = new MediaWikiContributor($u);
         unset($u);
         // Dont fill too much memory, if that helps.
     }
     /* -------------------- /Author -------------------- **/
     /* -------------------- XML source -------------------- **/
     $file = DATA_DIR . '/dumps/main_full.xml';
     $streamer = XmlStringStreamer::createStringWalkerParser($file);
     /* -------------------- /XML source -------------------- **/
     while ($node = $streamer->getNode()) {
         /**
          * 3rd pass, handle retries.
          *
          * This is useful if you went through all pages but some pages didn’t work.
          * We can ask to re-run only specific ones by using --retry= and a coma separated
          * list of index numbers (i.e. the $counter value we use for each page node).
          *
          * This set of case handles three situations only at 3rd pass AND when command has
          * --retry=n,n,n specified.
          *
          * 1. If current iteration ($counter) *matches* one of the $retries entries
          *
          *    We want to let the process be executed through and added as a revision
          *
          * 2. Current iteration ($counter) *isn’t listed* in $retries; go to next.
          *
          * 3. We have no entries in $retries anymore, exit.
          *
          * ... THIS IS BOGUS, USE --missed INSTEAD!
          */
         if (isset($retries) && in_array($indexCorrector, $retries)) {
             $retryNodeIndex = array_search($indexCorrector, $retries);
             unset($retries[$retryNodeIndex]);
             $output->writeln(PHP_EOL . sprintf('Will work on %d', $indexCorrector) . PHP_EOL);
         } elseif (isset($retries) && count($retries) >= 1) {
             ++$counter;
             continue;
         } elseif (isset($retries) && count($retries) === 0) {
             $output->writeln('No more retries to work with' . PHP_EOL);
             break;
         }
         /*
          * 3rd pass, handle interruption by telling where to resume work.
          *
          * This is useful if job stopped and you want to resume work back at a specific point.
          */
         if ($counter < $resumeAt) {
             ++$counter;
             continue;
         }
         /*
          * Limit the number of pages we’ll work on.
          *
          * Useful if you want to test conversion script without going through all the content.
          */
         if ($maxHops > 0 && $maxHops === $counter) {
             $output->writeln(sprintf('Reached desired maximum of %d documents', $maxHops) . PHP_EOL);
             break;
         }
         $pageNode = new SimpleXMLElement($node);
         if (isset($pageNode->title)) {
             $wikiDocument = new MediaWikiDocument($pageNode);
             $persistable = new GitCommitFileRevision($wikiDocument, 'out/content/', '.md');
             $title = $wikiDocument->getTitle();
             $normalized_location = $wikiDocument->getName();
             $file_path = $this->titleFilter->filter($persistable->getName());
             $redirect_to = $this->titleFilter->filter($wikiDocument->getRedirect());
             // False if not a redirect, string if it is
             $is_translation = $wikiDocument->isTranslation();
             $language_code = $wikiDocument->getLanguageCode();
             $language_name = $wikiDocument->getLanguageName();
             if ($listMissed === true && !in_array($normalized_location, $this->missed)) {
                 ++$counter;
                 continue;
             }
             if ($passNbr === 3 && $wikiDocument->hasRedirect() === false) {
                 $random = rand(5, 10);
                 $output->writeln(PHP_EOL . sprintf('--- sleep for %d to not break production ---', $random));
                 sleep($random);
             }
             $revs = $wikiDocument->getRevisions()->count();
             $output->writeln(sprintf('"%s":', $title));
             $output->writeln(sprintf('  - index: %d', $counter));
             $output->writeln(sprintf('  - normalized: %s', $normalized_location));
             $output->writeln(sprintf('  - file: %s', $file_path));
             if ($wikiDocument->hasRedirect() === true) {
                 $output->writeln(sprintf('  - redirect_to: %s', $redirect_to));
             }
             if ($is_translation === true) {
                 $output->writeln(sprintf('  - lang: %s (%s)', $language_code, $language_name));
             }
             /*
              * Merge deleted content history under current content.
              *
              * 1st pass: Only those with redirects (i.e. deleted pages). Should leave an empty out/ directory!
              * 2nd pass: Only those without redirects (i.e. current content).
              * 3nd pass: Only for those without redirects, they are going to get the latest version passed through the convertor
              */
             if ($wikiDocument->hasRedirect() === false && $passNbr === 1) {
                 // Skip all NON redirects for pass 1
                 $output->writeln(sprintf('  - skip: Document %s WITHOUT redirect, at pass 1 (handling redirects)', $title) . PHP_EOL . PHP_EOL);
                 ++$counter;
                 continue;
             } elseif ($wikiDocument->hasRedirect() && $passNbr === 2) {
                 // Skip all redirects for pass 2
                 $output->writeln(sprintf('  - skip: Document %s WITH redirect, at pass 2 (handling non redirects)', $title) . PHP_EOL . PHP_EOL);
                 ++$counter;
                 continue;
             } elseif ($wikiDocument->hasRedirect() && $passNbr === 3) {
                 // Skip all redirects for pass 2
                 $output->writeln(sprintf('  - skip: Document %s WITH redirect, at pass 3', $title) . PHP_EOL . PHP_EOL);
                 ++$counter;
                 continue;
             }
             if ($passNbr < 1 || $passNbr > 3) {
                 throw new DomainException('This command has only three pases.');
             }
             foreach (explode('/', $normalized_location) as $urlDepth => $urlPart) {
                 $urlParts[strtolower($urlPart)] = $urlPart;
             }
             $revList = $wikiDocument->getRevisions();
             $revLast = $wikiDocument->getLatest();
             $revCounter = 0;
             if ($passNbr === 3) {
                 // Overwriting $revList for last pass we’ll
                 // use for conversion.
                 $revList = new SplDoublyLinkedList();
                 // Pass some data we already have so we can
                 // get it in the converted document.
                 if ($is_translation === true) {
                     $revLast->setFrontMatter(array('lang' => $language_code));
                 }
                 $revList->push($revLast);
             } else {
                 $output->writeln(sprintf('  - revs: %d', $revs));
                 $output->writeln(sprintf('  - revisions:'));
             }
             /* ----------- REVISIONS --------------- **/
             for ($revList->rewind(); $revList->valid(); $revList->next()) {
                 if ($revMaxHops > 0 && $revMaxHops === $revCounter) {
                     $output->writeln(sprintf('    - stop: Reached maximum %d revisions', $revMaxHops) . PHP_EOL . PHP_EOL);
                     break;
                 }
                 $wikiRevision = $revList->current();
                 /* -------------------- Author -------------------- **/
                 // An edge case where MediaWiki may give author as user_id 0, even though we dont have it
                 // so we’ll give the first user instead.
                 $contributor_id = $wikiRevision->getContributorId() === 0 ? 1 : $wikiRevision->getContributorId();
                 /*
                  * Fix duplicates and merge them as only one.
                  *
                  * Please adjust to suit your own.
                  *
                  * Queried using jq;
                  *
                  *     cat data/users.json | jq '.[]|select(.user_real_name == "Renoir Boulanger")'
                  */
                 //if (in_array($contributor_id, [172943, 173060])) {
                 //    $contributor_id = 10080;
                 //}
                 if (isset($this->users[$contributor_id])) {
                     $contributor = clone $this->users[$contributor_id];
                     // We want a copy, because its specific to here only anyway.
                     $wikiRevision->setContributor($contributor, false);
                 } else {
                     // In case we didn’t find data for $this->users[$contributor_id]
                     $contributor = clone $this->users[1];
                     // We want a copy, because its specific to here only anyway.
                     $wikiRevision->setContributor($contributor, false);
                 }
                 /* -------------------- /Author -------------------- **/
                 // Lets handle conversion only at 3rd pass.
                 if ($passNbr === 3) {
                     try {
                         $revision = $this->converter->apply($wikiRevision);
                     } catch (Exception $e) {
                         $output->writeln(sprintf('    - ERROR: %s, left a note in errors/%d.txt', $e->getMessage(), $counter));
                         $this->filesystem->dumpFile(sprintf('errors/%d.txt', $counter), $e->getMessage());
                         ++$counter;
                         continue;
                     }
                     // user_id 10080 is Renoirb (yours truly)
                     $revision->setAuthor($this->users[10080]);
                     $revision_id = $revLast->getId();
                 } else {
                     $revision = $wikiRevision;
                     $revision_id = $wikiRevision->getId();
                     $output->writeln(sprintf('    - id: %d', $revision_id));
                     $output->writeln(sprintf('      index: %d', $revCounter));
                 }
                 $persistArgs = $persistable->setRevision($revision)->getArgs();
                 if ($passNbr < 3) {
                     foreach ($persistArgs as $argKey => $argVal) {
                         if ($argKey === 'message') {
                             $argVal = mb_strimwidth($argVal, strpos($argVal, ': ') + 2, 100);
                         }
                         $output->writeln(sprintf('      %s: %s', $argKey, $argVal));
                     }
                 }
                 $removeFile = false;
                 if ($passNbr < 3 && $revLast->getId() === $wikiRevision->getId() && $wikiDocument->hasRedirect()) {
                     $output->writeln('      is_last_and_has_redirect: True');
                     $removeFile = true;
                 }
                 $persistable->setRevision($revision);
                 $this->filesystem->dumpFile($file_path, (string) $persistable);
                 try {
                     $this->git->add()->execute(preg_replace('/^out\\//', '', $file_path));
                 } catch (GitException $e) {
                     $message = sprintf('Could not add file "%s" with title "%s" for revision %d', $file_path, $title, $revision_id);
                     throw new Exception($message, null, $e);
                 }
                 if ($passNbr < 3) {
                     // We won’t expose all WebPlatform user emails to the public. Instead,
                     // we’ll create a bogus email alias based on their MediaWiki username.
                     $real_name = $wikiRevision->getContributor()->getRealName();
                     $username = $wikiRevision->getContributor()->getName();
                     $email = sprintf('%s@%s', $username, COMMITER_ANONYMOUS_DOMAIN);
                     $author_overload = sprintf('%s <%s>', $real_name, $email);
                     try {
                         $this->git->commit()->message($persistArgs['message'])->author('"' . $author_overload . '"')->date('"' . $persistArgs['date'] . '"')->allowEmpty()->execute();
                     } catch (GitException $e) {
                         var_dump($this->git);
                         $message = sprintf('Could not commit for revision %d', $revision_id);
                         throw new Exception($message, null, $e);
                     }
                     if ($removeFile === true) {
                         try {
                             $this->git->rm()->execute(preg_replace('/^out\\//', '', $file_path));
                         } catch (GitException $e) {
                             $message = sprintf('Could remove %s at revision %d', $file_path, $revision_id);
                             throw new Exception($message, null, $e);
                         }
                         $this->git->commit()->message('Remove file; ' . $persistArgs['message'])->author('"' . $author_overload . '"')->date('"' . $persistArgs['date'] . '"')->allowEmpty()->execute();
                         $this->filesystem->remove($file_path);
                     }
                 }
                 /* End of $passNubr === 3 */
                 ++$revCounter;
             }
             /* ----------- REVISIONS --------------- **/
             $output->writeln(PHP_EOL);
         }
         ++$counter;
     }
     if ($passNbr === 3) {
         $output->writeln('3rd pass. One. Commit.' . PHP_EOL . PHP_EOL);
         try {
             $this->git->commit()->message($revision->getComment())->execute();
         } catch (GitException $e) {
             var_dump($this->git);
             $message = sprintf('Could not commit for revision %d', $revision_id);
             throw new Exception($message, null, $e);
         }
     }
 }