/** * Return the job id of the slurp */ public static function slurp($url) { $url_format = '/^(https?):\\/\\/' . '(([a-z0-9$_\\.\\+!\\*\'\\(\\),;\\?&=-]|%[0-9a-f]{2})+' . '(:([a-z0-9$_\\.\\+!\\*\'\\(\\),;\\?&=-]|%[0-9a-f]{2})+)?' . '@)?(?#' . ')((([a-z0-9][a-z0-9-]*[a-z0-9]\\.)*' . '[a-z][a-z0-9-]*[a-z0-9]' . '|((\\d|[1-9]\\d|1\\d{2}|2[0-4][0-9]|25[0-5])\\.){3}' . '(\\d|[1-9]\\d|1\\d{2}|2[0-4][0-9]|25[0-5])' . ')(:\\d+)?' . ')(((\\/+([a-z0-9$_\\.\\+!\\*\'\\(\\),;:@&=-]|%[0-9a-f]{2})*)*' . '(\\?([a-z0-9$_\\.\\+!\\*\'\\(\\),;:@&=-]|%[0-9a-f]{2})*)' . '?)?)?' . '(#([a-z0-9$_\\.\\+!\\*\'\\(\\),;:@&=-]|%[0-9a-f]{2})*)?' . '$/i'; if (!preg_match($url_format, $url)) { throw new Mijireh_NotFound('Unable to slurp invalid URL: $url'); } try { $rest = new Mijireh_Rest($url); $html = $rest->get(''); $data = array('url' => $url, 'html' => $html); $rest = new Mijireh_RestJSON(self::$url); $rest->setupAuth(self::$access_key, ''); $result = $rest->post('slurps', $data); return $result['job_id']; } catch (Mijireh_Rest_Unauthorized $e) { throw new Mijireh_Unauthorized("Unauthorized. Please check your api access key"); } catch (Mijireh_Rest_NotFound $e) { throw new Mijireh_NotFound("Mijireh resource not found: " . $rest->last_request['url']); } catch (Mijireh_Rest_ClientError $e) { throw new Mijireh_ClientError($e->getMessage()); } catch (Mijireh_Rest_ServerError $e) { throw new Mijireh_ServerError($e->getMessage()); } catch (Mijireh_Rest_UnknownResponse $e) { throw new Mijireh_Exception('Unable to slurp the URL: $url'); } }
protected function prepRequest($opts, $url) { $opts[CURLOPT_HTTPHEADER][] = 'Accept: application/json'; $opts[CURLOPT_HTTPHEADER][] = 'Content-Type: application/json'; return parent::prepRequest($opts, $url); }