function get_thesaurus_entry($word) { $scraper = new Scrapt(); $scraper->setURL('http://thesaurus.com/browse/' . $word); $data = $scraper->cache(); $page = $scraper->getPage(); $properties = $page->findBySelector('table.the_content tr'); $main_entry = ''; $part_of_speech = ''; $definition = ''; $entries = array(); foreach ($properties as $p) { $name = str_replace(':', '', strip_tags($p->find('td', 0)->innertext)); $value = trim(str_replace("\n", " ", strip_tags($p->find('td', 1)->innertext))); $prop = label_to_property($name); if (isset(${$prop})) { ${$prop} = $value; continue; } if (!isset($entries[$main_entry])) { $entries[$main_entry] = array(); } if (!isset($entries[$main_entry][$part_of_speech])) { $entries[$main_entry][$part_of_speech] = array(); } $entries[$main_entry][$part_of_speech][$definition][$prop] = $value; } return $entries; }
function scrape_delicious() { $scraper = new Scrapt(); $scraper->setURL('http://www.delicious.com/'); $data = $scraper->cache(); $page = $scraper->getPage(); $links = $page->findBySelector('#bookmarklist li'); $marks = array(); foreach ($links as $l) { $link = $l->find('.data h4>a', 0); if ($link instanceof simple_html_dom_node) { $url = $link->href; $name = $link->innertext; $faves = intval($l->find('span.delNavCount', 0)->innertext); $tweets = intval($l->find('h5.num-tweets', 0)->innertext); $marks[] = array('url' => $url, 'name' => $name, 'faves' => $faves, 'tweets' => $tweets); } } return $marks; }