function get_thesaurus_entry($word) { $scraper = new Scrapt(); $scraper->setURL('http://thesaurus.com/browse/' . $word); $data = $scraper->cache(); $page = $scraper->getPage(); $properties = $page->findBySelector('table.the_content tr'); $main_entry = ''; $part_of_speech = ''; $definition = ''; $entries = array(); foreach ($properties as $p) { $name = str_replace(':', '', strip_tags($p->find('td', 0)->innertext)); $value = trim(str_replace("\n", " ", strip_tags($p->find('td', 1)->innertext))); $prop = label_to_property($name); if (isset(${$prop})) { ${$prop} = $value; continue; } if (!isset($entries[$main_entry])) { $entries[$main_entry] = array(); } if (!isset($entries[$main_entry][$part_of_speech])) { $entries[$main_entry][$part_of_speech] = array(); } $entries[$main_entry][$part_of_speech][$definition][$prop] = $value; } return $entries; }
function scrape_delicious() { $scraper = new Scrapt(); $scraper->setURL('http://www.delicious.com/'); $data = $scraper->cache(); $page = $scraper->getPage(); $links = $page->findBySelector('#bookmarklist li'); $marks = array(); foreach ($links as $l) { $link = $l->find('.data h4>a', 0); if ($link instanceof simple_html_dom_node) { $url = $link->href; $name = $link->innertext; $faves = intval($l->find('span.delNavCount', 0)->innertext); $tweets = intval($l->find('h5.num-tweets', 0)->innertext); $marks[] = array('url' => $url, 'name' => $name, 'faves' => $faves, 'tweets' => $tweets); } } return $marks; }
public static function submit(Scrapt_Component_Form $form, $withButton = null, $cache = false) { $payload = $form->getPayload(); $action = $form->getAction(); $method = $form->getMethod(); $action = Scrapt::resolveURL($form->getPageURL(), $action); self::validateURL($action); $data = self::$agent->request($method, $action, $payload); print_r($data); exit; return Scrapt_Webpage::fromData($data['data'], $action); }
public static function fromURL($url) { $scrapt = new Scrapt($url); return self::fromData($scrapt->cache(), $url); }