/** * Parses ais html into DOM. * * @param Trace $trace * @param string $html * * @returns DOMDocument parsed DOM * @throws ParseException on failure */ public static function createDomFromHtml(Trace $trace, $html) { Preconditions::checkIsString($html); $dom = new DOMDocument(); $trace->tlog("Loading html to DOM"); $loaded = @$dom->loadHTML($html); if (!$loaded) { throw new ParseException("Problem parsing html to DOM."); } $trace->tlog('Fixing id attributes in the DOM'); ParserUtils::fixIdAttributes($trace, $dom); return $dom; }
/** * Creates array with elements parsed from html containing information list. * * @param string $aisResponseHtml * * @returns complete array with parsed data from html * @throws ParseException on failure of creating DOM from html */ public function parseHtmlIntoTable(Trace $trace, $aisResponseHtml) { $parsedData = array(); $this->attribute_names = array(); Preconditions::checkIsString($aisResponseHtml); $html = self::fixBr($trace, $aisResponseHtml); $domWholeHtml = ParserUtils::createDomFromHtml($trace, $html); $domWholeHtml->preserveWhiteSpace = false; //ziskanie nazvu skoly, jedina vec co chcem ziskat co sa nenachadza v tabulke $b = $domWholeHtml->getElementsByTagName("b"); $trace->tlog("Finding first element with tag name 'b'"); $parsedData = $this->spracujB($trace, $b->item(0)); $tr = $domWholeHtml->getElementsByTagName("tr"); $trace->tlog("Getting all elements with tag name 'tr'"); // prechadzam vsetkymi <tr> tagmi $firstTr = 0; foreach ($tr as $tr_key) { // nechcem uplne prvy tag co je v tr, za <b> je iba nazov: informacny list if ($firstTr == 0) { $firstTr = 1; continue; } $trace->tlog("Getting all elements with tag name 'td'"); $td = $tr_key->getElementsByTagName("td"); // prechadzam <td> tagmi foreach ($td as $td_key) { if (!$td_key->hasChildNodes()) { continue; } $trace->tlog("Getting all child nodes of element 'td'"); $td_children = $td_key->childNodes; foreach ($td_children as $final) { if ($final->nodeType != \XML_ELEMENT_NODE) { continue; } if ($final->tagName == 'b') { $trace->tlog("Parsing node with tag name 'b'"); $parsedData = array_merge($parsedData, $this->spracujB($trace, $final)); } if ($final->tagName == 'div') { $trace->tlog("Parsing node with tag name 'div'"); $parsedData = array_merge($parsedData, $this->parseDiv($trace, $final)); } } } } return $parsedData; }