function addFile($url) { if (!isset($this->internal_fields['files'])) { $this->internal_fields['files'] = []; } if (substr($url, 0, strlen("http")) == "http") { $http = new \HTTP_Request(); $contents = $http->request($url); $url = explode("?", $url); $url = explode("/", $url[0]); $filename = end($url); } else { $contents = file_get_contents($url); $filename = pathinfo($url, PATHINFO_BASENAME); } $this->internal_fields['files'][] = ["filename" => $filename, "contents" => base64_encode($contents)]; }
/** * Replaces PHP's file_get_contents in URLs, to get around the allow_url_fopen limitation. * Still loads regular files using file_get_contents. * * @param string $url * @return string */ function get_url_contents($url, $redirect = true) { if (empty($url)) { return ''; } # First, let's check whether this is a local file. if (stristr($url, FCPATH) !== false) { return file_get_contents($url); } # This is for PDFs, to bypass the need for an external request. $config = array(); include APPPATH . 'config/template.php'; $theme_location = $config['theme_locations'][0]; $fcpath = FCPATH; $base_url = BASE_URL; $buffer = str_ireplace($fcpath, '', $theme_location); $buffer = $base_url . $buffer; # Check if it's in third_party/themes. if (substr($url, 0, strlen($buffer)) == $buffer) { $path_without_buffer = substr($url, strlen($buffer), strlen($url) - strlen($buffer)); $path_without_version = explode('?', $path_without_buffer); $path_without_version = $path_without_version[0]; $path = $theme_location . $path_without_version; if (file_exists($path)) { return file_get_contents(urldecode($path)); } } # Check if it's in uploads. $buffer = $base_url . 'uploads/'; if (substr($url, 0, strlen($buffer)) == $buffer) { $path_without_buffer = substr($url, strlen($buffer), strlen($url) - strlen($buffer)); $path_without_version = explode('?', $path_without_buffer); $path_without_version = $path_without_version[0]; $path = FCPATH . 'uploads/' . $path_without_version; if (file_exists($path)) { return file_get_contents(urldecode($path)); } } if (substr($url, 0, 7) != 'http://') { return file_get_contents($url); } else { include_once APPPATH . 'libraries/HTTP_Request.php'; $http = new HTTP_Request(); try { $result = $http->request($url); } catch (Exception $e) { deal_with_no_internet($redirect, $url); return ''; } $result = trim($result); return $result; } }
</head> <body> <form> <p>Input Url, for example http://philadelphia.craigslist.org/apa/5299825266.html</p> <input type="text" name="url" value="<?php echo @$_GET['url'] ? @$_GET['url'] : "http://philadelphia.craigslist.org/apa/5299825266.html"; ?> "> <input type="submit" name="parse" value="parse"> </form> <?php if (isset($_GET['url'])) { require_once "request.php"; require_once "simple_html_dom.php"; $url = $_GET['url']; $http = new HTTP_Request(); $html = new simple_html_dom(); $content = $http->request($url); $html->load($content); $posting = $html->find("section#postingbody")[0]->outertext; $contactInfoLink = "http://philadelphia.craigslist.org" . $html->find('a.showcontact')[0]->href; $content = $http->request($contactInfoLink); print_r($content); } function l($str) { echo $str . "\n"; } ?> </body> </html>
} }; $pool = new Pool($client, $requests($urls), ['concurrency' => 2, 'fulfilled' => function ($response, $index) { echo "{$index} loaded\n"; parseExhibitorsLinks((string) $response->getBody(), $index); }, 'rejected' => function ($reason, $index) { // this is delivered each failed request echo "{$index} rejected\n"; }]); foreach ($urls as $ind => $url) { $stream = fopen($url, 'r'); if (!$stream) { continue; } while ($line = fgets($stream)) { $link = parseExhibitorsLinks($http->request($url), $ind); if ($link) { echo "{$link}\n"; } } } function parseExhibitorsLinks($text, $index) { //echo $text; $aind = strpos($text, 'exhibitorName" href="', $aind + 1); $exhibitorLink = FALSE; if ($aind !== false) { $endind = strpos($text, '"', $aind + 21); echo "aind={$aind} endind={$endind}\n"; $exhibitorLink = substr($text, $aind + 21, $endind); }