public function testIncorrect() { $dom = SimpleHtmlDom::fromUrl(__DIR__ . '/incorrect.html'); $a = new ReplacementsParser(); $result = $a->getReplacements($dom); $this->assertEquals(null, $result); $this->assertGreaterThan(0, count($a->getErrors())); }
private function updateReplacements($dom) { $model = new ReplacementsRepository(); $parser = new ReplacementsParser(); $remoteList = $parser->getReplacements($dom); $this->logErrors('ReplacementsParser', $parser->getErrors()); if ($remoteList !== null) { foreach ($remoteList as $remote) { $local = $model->getByClassAndDate($remote->class, $remote->date); if ($local === null || $remote->value !== $local->value) { $model->setValue($remote->class, $remote->date, $remote->value); echo "updated replacements/{$remote->date}/{$remote->class}\n"; } } } }
/** * Parses replacements from html. * If them cannot be find or they are in incorrect format returns null. * @param SimpleHtmlDom $dom * @return Replacements[]|null */ public function getReplacements($dom) { $itemPage = $dom->root()->find('div[class=item-page]', 0); if ($itemPage === null) { return null; } $h4 = $itemPage->find('h4', 0); $table = $itemPage->find('table', 0); if ($h4 === null || $table === null) { return null; } //parse date $date = ReplacementsParser::parseDate(trim($h4->text())); if ($date === null) { $this->errors[] = "incorrect date format: " . $h4->text(); return null; } //parse content $rows = $table->find('tr'); /** @var Replacements[] $replacements */ $replacements = []; /** @var Replacements|null $current */ $current = null; foreach ($rows as $i => $row) { $cells = $row->find('th, td'); if (count($cells) === 1) { //class name, e.g. | 2a | $current = new Replacements(); $current->date = $date; $current->class = $cells[0]->text(); $current->value = []; $replacements[] = $current; } else { if (count($cells) === 2) { //replacement entry, e.g. | 1 | j. niemiecki, mgr T. Wajdzik | if ($current === null) { $this->errors[] = "row: {$i}, no class name occurred before replacement text"; continue; //skip this row } $hourText = $cells[0]->text(); $text = $cells[1]->text(); if (!is_numeric($hourText)) { $this->errors[] = "row: {$i}, invalid hour no.: '{$hourText}'"; continue; //skip this row } $hour = intval($hourText); $current->value[$hour] = $text; } } } return $replacements; }