<?php include __DIR__ . '/fun.php'; $url = 'http://homeoint.org/books2/boenchar/index.htm'; $html = url_get($url); $html = fromtxt2txt($html, 'CONTENTS', 'by H.A. Roberts'); $a = []; $urls = []; if (preg_match_all('~<a href="([^"]+)"~', $html, $a)) { foreach ($a[1] as $i => $href) { if (strstr($href, '..')) { continue; } $url2 = dirname($url) . '/' . $href; if ($pos = strpos($url2, '#')) { $url2 = substr($url2, 0, $pos); } if (isset($urls[$url2])) { continue; } $urls[$url2] = true; echo "{$url2}\n"; $html = url_get($url2); } }
<?php include __DIR__ . '/fun.php'; $book_name = 'MATERIA MEDICA - By William BOERICKE'; $book = new bookModel(); $book->find_one_by_name($book_name); if (!$book->id) { $book->name = $book_name; } $book->author = 'William Boericke'; $book->save(); $url = 'http://homeoint.org/books/boericmm/remedies.htm'; $html = url_get($url); $html = fromtxt2txt($html, 'Presented by Médi-T', 'Copyright'); $a = []; $urls = []; if (preg_match_all('~<a href="([^"]+)"[^>]*>([^<]+)</a> ------> ([^<]+)<~', $html, $a)) { foreach ($a[1] as $i => $href) { if (strstr($href, '..')) { continue; } $url2 = dirname($url) . '/' . $href; //if ($pos=strpos($url2,'#')) $url2=substr($url2,0,$pos); //if (isset($urls[$url2])) continue; //$urls[$url2]=true; $remedium_name = strtolower(trim($a[3][$i])); $remedium_abbr = strtolower(trim($a[2][$i])); $remedium_name = preg_replace('/\\s+/', ' ', $remedium_name); if (isset($argv[1]) && $argv[1] != $remedium_abbr) { continue; }