public function parse(\DOMNode $node) { $this->doc = $node->ownerDocument; $sequence = []; $sequence = $this->parseHead($sequence, $node); $sequence = $this->parseBeginning($sequence, $node); // detail rows contain all the tagged stuff (be extra strict with selecting those, as they can contain // a lot of different markup) $detailsRows = $this->xpath($node, 'tr[5]/td/table/tr'); assert(count($detailsRows) > 0, 'sequence must have at least one detail row'); foreach ($detailsRows as $detailsRow) { $type = strtolower($this->text($detailsRow, 'td[2]/font', 1, true)); $lineNodes = $this->xpath($detailsRow, 'td[3]//tt'); $lines = []; foreach ($lineNodes as $lineNode) { $html = $this->toHTML($lineNode); // trim the surrounding <tt> tag $html = preg_replace('#^<tt>(.*?)</tt>$#', '$1', $html); $lines[] = ['text' => $this->toText($lineNode), 'html' => $html, 'node' => $lineNode]; } // just in case we hit something that has no valid lines if (count($lines) > 0) { switch ($type) { case 'offset': $sequence = $this->parseOffset($sequence, $lines); break; case 'status': $sequence = $this->parseStatus($sequence, $lines); break; case 'author': $sequence = $this->parseAuthor($sequence, $lines); break; case 'keyword': $sequence = $this->parseKeyword($sequence, $lines); break; case 'comments': $sequence = $this->parseComments($sequence, $lines); break; case 'references': $sequence = $this->parseReferences($sequence, $lines); break; case 'links': $sequence = $this->parseLinks($sequence, $lines); break; case 'formula': $sequence = $this->parseFormula($sequence, $lines); break; case 'example': $sequence = $this->parseExample($sequence, $lines); break; case 'maple': $sequence = $this->parseProgram($sequence, $lines, 'maple'); break; case 'mathematica': $sequence = $this->parseProgram($sequence, $lines, 'mathematica'); break; case 'prog': $sequence = $this->parseProgram($sequence, $lines, 'other'); break; case 'crossrefs': $sequence = $this->parseCrossrefs($sequence, $lines); break; case 'extensions': $sequence = $this->parseExtensions($sequence, $lines); break; default: trigger_error("Unknown {$type} found in {$sequence['identification']}[OEIS].", E_USER_WARNING); $sequence['junk'][$type] = $lines; } } } $sequence = new Sequence($sequence); // backup, in case we improve parsing later on and want to skip crawling everything again $sequence->setRawHTML($this->toHTML($node)); return $sequence; }