$atribut = $atribut->plaintext; $atribut = substr($atribut, 0, -2); //print $atribut; for ($xxx = 0; $postual->find("option", $xxx) != ""; $xxx++) { $option_curent = $postual->find("option", $xxx); $option_number = $postual->find("option", $xxx); $pos = strrpos($option_curent, "selected"); if ($pos === false) { $dodatok = 0; } else { $dodatok = 1; } $option_curent = $option_curent->plaintext; $option_number = $option_number->value; $moznosti[$xxx] = $option_number . "|" . $atribut . ": " . $option_curent . "|" . $dodatok; $moznosti[$xxx] = traducirHTML($moznosti[$xxx]); $moznostix = explode("|", $moznosti[$xxx]); //print $moznostix[2] . "\n"; $ulozm[$f] = $moznosti[$xxx]; $f++; } } } //postual end $a = "// Combinations"; $b = "// Colors"; $go = get_between($html, $a, $b); $gox = explode(";", $go); $nr = count(explode(";", $go)); for ($gox_pom = 0; $gox_pom < $nr - 1; $gox_pom++) { $medzikus = get_between($gox[$gox_pom], "('", "')") . "|" . get_between2($gox[$gox_pom], "'), 1, ", ",");
// zlavnena cena if ($perc_zlava == "") { } else { foreach ($html->find("span#our_price_display") as $zlavnena_cena) { /*print $cena->plaintext . "\n";*/ $test2 = $zlavnena_cena->plaintext; $test2 = explode(" ", $test2); $zlavnena_cena = $test2[0]; } } //koniec if //vyrobca $vyrobca = ""; $vyrobca = $html->find("span.navigation_end a", 0); $vyrobca = $vyrobca->plaintext; $vyrobca = traducirHTML($vyrobca); //print $vyrobca. "\n"; //image url $atribut = '_'; foreach ($html->find("div#attributes label") as $atribut) { $atribut = $atribut->plaintext; $atribut = preg_replace("/:/", '', $atribut); //echo $atribut; } $option_all = ''; foreach ($html->find("div#attributes option") as $option) { $option = $option->plaintext; //$atribut = preg_replace("/:/", '', $atribut); $option_all .= $option . "|"; } $option_all = substr($option_all, 0, -1);
function scrapSemana($sem, $year) { $url = "http://www.congreso.es/portal/page/portal/Congreso/GenericPopUp?_piref73_2138150_73_2138147_2138147.next_page=/wc/agendaCompleta&semana=" . $sem . "-" . $year; $html_content = scraperwiki::scrape($url); $html_content = traducirHTML($html_content); $agendaHTML = str_get_html($html_content); $num = 1; for ($i = 1; $i < 8; $i++) { $dia = array(); $agendaDia = $agendaHTML->find('div[id=agenda' . $i . ']', 0); $fechadia = sinTNS($agendaDia->find('div[class=prog_dia]', 0)->plaintext); $dia["fecha"] = fechaNumerica($fechadia); $dia["nombre"] = substrHasta($fechadia, " "); foreach ($agendaDia->find('div[class=parrilla]') as $element) { $evento = array(); $evento["dia"] = $dia["nombre"]; $evento["fecha"] = $dia["fecha"]; //$evento["id"]=$num."-".$sem."-".$year; $evento["id"] = $num; $evento["hora"] = getHora($element); $evento["url"] = getURL($element); $evento["lugar"] = getLugar($element); $evento["event"] = getEvento($element, $evento["lugar"]); $evento["info"] = getInfo($element, $evento["event"], $evento["lugar"]); print_r(". scrapeado evento " . $evento["id"]); print_r(". evento: " . $evento["event"]); scraperwiki::save_sqlite(array("id"), array("id" => $evento["id"], "dia" => $evento["dia"], "fecha" => $evento["fecha"], "hora" => $evento["hora"], "url" => $evento["url"], "lugar" => $evento["lugar"], "info" => $evento["info"], "event" => $evento["event"])); $num++; } } }