function get($url, $parameters = NULL) { $parts = array(); if (preg_match('#([\\w-]*):([\\w-]+|[^/]|$)((:(.*))*)#', $url, $parts) != 0 && $parts[1] != 'mailto') { list($zone_name, $vars, $hash) = page_link_decode($url); $real_url = _build_url($vars, $zone_name, NULL, false, false, false, $hash); $ret = parent::get($real_url, $parameters); } else { $ret = parent::get($url, $parameters); } // Save, so we can run validation on it later $path = get_file_base() . '/_tests/html_dump/' . get_class($this); if (!file_exists($path)) { mkdir($path, 0777); } $content = $this->_browser->getContent(); $outfile = fopen($path . '/' . url_to_filename($url) . '.htm', 'wb'); fwrite($outfile, $content); fclose($outfile); sync_file($path . '/' . url_to_filename($url) . '.htm'); fix_permissions($path . '/' . url_to_filename($url) . '.htm'); // Save the text so we can run through Word's grammar checker $text_content = $content; $text_content = preg_replace('#<[^>]* title="([^"]+)"<[^>]*>#U', '\\1', $text_content); $text_content = preg_replace('#<[^>]* alt="([^"]+)"<[^>]*>#U', '\\1', $text_content); $text_content = preg_replace('#<style[^>]*>.*</style>#Us', '', $text_content); $text_content = preg_replace('#<script[^>]*>.*</script>#Us', '', $text_content); $text_content = preg_replace('#<[^>]*>#U', '', $text_content); $text_content = preg_replace('#\\s\\s+#', '. ', $text_content); $text_content = str_replace('–', '-', $text_content); $text_content = str_replace('—', '-', $text_content); $text_content = str_replace('…', '...', $text_content); $text_content = @html_entity_decode($text_content, ENT_QUOTES); $outfile = fopen($path . '/' . url_to_filename($url) . '.txt', 'wb'); fwrite($outfile, $text_content); fclose($outfile); return $ret; }
/** * Fetches a page into the page buffer. If * there is no base for the URL then the * current base URL is used. After the fetch * the base URL reflects the new location. * * When the requested URL is pointing at a * resource inside the './site/protected/' part of * the test site, than we will request the * appropriate .htaccess fixup page first and * make sure that it made all the right noises * on return. This is done to prevent you from * stumbling into a rain of 500-Internal Server * Errors due to a site-specific part of * .htaccess not having been setup yet. * * For more info, read ./site/fix_protected_access.php * * @param string $url URL to fetch. * @param hash $parameters Optional additional GET data. * @return boolean/string Raw page on success. * @access public */ function get($url, $parameters = false) { $this->fix_protected_zone($url); return parent::get($url, $parameters); }