/** * @brief PHP's json_decode which throws an exception on error * * @param string $json * @param bool $assoc * @param int $depth * @param int $options * @param bool $logJson: if true, the exception data will contain the JSON * @param bool $lint * @return object|array * @throws \Keboola\Utils\Exception\JsonDecodeException */ function jsonDecode($json, $assoc = false, $depth = 512, $options = 0, $logJson = false, $lint = false) { $data = json_decode($json, $assoc, $depth, $options); switch (json_last_error()) { case JSON_ERROR_NONE: return $data; break; case JSON_ERROR_DEPTH: $error = 'Maximum stack depth exceeded'; break; case JSON_ERROR_STATE_MISMATCH: $error = 'Underflow or the modes mismatch'; break; case JSON_ERROR_CTRL_CHAR: $error = 'Unexpected control character found'; break; case JSON_ERROR_SYNTAX: $error = 'Syntax error, malformed JSON'; break; case JSON_ERROR_UTF8: $error = 'Malformed UTF-8 characters, possibly incorrectly encoded'; break; default: $error = 'Unknown error'; break; } $e = new Exception\JsonDecodeException("JSON decode error: {$error}"); $errData = []; if ($logJson) { $errData['json'] = $json; } if ($lint) { $jsonLint = new \Seld\JsonLint\JsonParser(); $errLint = $jsonLint->lint($json); $errData['errDetail'] = $errLint instanceof \Seld\JsonLint\ParsingException ? $errLint->getMessage() : null; } if (!empty($errData)) { $e->setData($errData); } throw $e; }
public function testScrape() { $parser = new \Seld\JsonLint\JsonParser(); $googleScraper = Builder::create($this->engines[0], array(array('foo', 'baz'), 'google')); $outDir = $googleScraper->getOutDir(); $this->assertFalse($googleScraper->scrape('bar')); $this->assertFalse($googleScraper->scrape('baz', 100)); $this->assertFalse($googleScraper->scrape('baz', 1, 'baz')); $this->assertFalse($googleScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($googleScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertFalse($googleScraper->serialize('json')); $this->assertTrue($googleScraper->scrape('foo', 2, true, 'Europe/Berlin')); $this->assertCount(2, $googleScraper->getFetchedPages()); $this->assertCount(1, $googleScraper->getKeywords()); $this->assertTrue($googleScraper->scrape('baz', 2, true)); $this->assertCount(4, $googleScraper->getFetchedPages()); $this->assertCount(0, $googleScraper->getKeywords()); $this->assertFalse($googleScraper->scrapeAll()); $this->assertTrue($googleScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($googleScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $googleScraper->getFetchedPages()); $this->assertCount(0, $googleScraper->getKeywords()); $this->assertFalse($googleScraper->serialize('baz')); $this->assertTrue($googleScraper->serialize('json', true)); $this->assertCount(0, $googleScraper->getFetchedPages()); $this->assertCount(8, $googleScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages())); $this->assertTrue($googleScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } $this->assertTrue($googleScraper->addKeywords(array('foo bad'))); $this->assertTrue($googleScraper->scrapeAll(3, true)); $this->assertCount(3, $googleScraper->getFetchedPages()); $this->assertTrue($googleScraper->serialize('xml', true)); $this->assertCount(0, $googleScraper->getFetchedPages()); $this->assertCount(3, $googleScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($googleScraper->getSerializedPages())); $this->assertTrue($googleScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $askScraper = Builder::create($this->engines[1], array(array('foo', 'baz'), 'ask')); $outDir = $askScraper->getOutDir(); $this->assertFalse($askScraper->scrape('bar')); $this->assertFalse($askScraper->scrape('baz', 100)); $this->assertFalse($askScraper->scrape('baz', 1, 'baz')); $this->assertFalse($askScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($askScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertTrue($askScraper->scrape('foo', 2, true, 'Europe/Rome')); $this->assertCount(2, $askScraper->getFetchedPages()); $this->assertCount(1, $askScraper->getKeywords()); $this->assertTrue($askScraper->scrape('baz', 2, true)); $this->assertCount(4, $askScraper->getFetchedPages()); $this->assertCount(0, $askScraper->getKeywords()); $this->assertFalse($askScraper->scrapeAll()); $this->assertTrue($askScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($askScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $askScraper->getFetchedPages()); $this->assertCount(0, $askScraper->getKeywords()); $this->assertFalse($askScraper->serialize('baz')); $this->assertTrue($askScraper->serialize('xml', true)); $this->assertCount(0, $askScraper->getFetchedPages()); $this->assertCount(8, $askScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages())); $this->assertTrue($askScraper->save(true)); $this->assertCount(0, $askScraper->getSerializedPages()); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $this->assertTrue($askScraper->addKeywords(array('foobaz'))); $this->assertTrue($askScraper->scrapeAll(3, true)); $this->assertTrue($askScraper->serialize('json', true)); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($askScraper->getSerializedPages())); $this->assertTrue($askScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } $bingScraper = Builder::create($this->engines[2], array(array('foo', 'baz'), 'bing')); $outDir = $bingScraper->getOutDir(); $this->assertFalse($bingScraper->scrape('bar')); $this->assertFalse($bingScraper->scrape('baz', 100)); $this->assertFalse($bingScraper->scrape('baz', 1, 'baz')); $this->assertFalse($bingScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($bingScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertFalse($bingScraper->serialize('json')); $this->assertTrue($bingScraper->scrape('foo', 2, true, 'Europe/Berlin')); $this->assertCount(2, $bingScraper->getFetchedPages()); $this->assertCount(1, $bingScraper->getKeywords()); $this->assertTrue($bingScraper->scrape('baz', 2, true)); $this->assertCount(4, $bingScraper->getFetchedPages()); $this->assertCount(0, $bingScraper->getKeywords()); $this->assertFalse($bingScraper->scrapeAll()); $this->assertTrue($bingScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($bingScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $bingScraper->getFetchedPages()); $this->assertCount(0, $bingScraper->getKeywords()); $this->assertFalse($bingScraper->serialize('baz')); $this->assertTrue($bingScraper->serialize('json', true)); $this->assertCount(0, $bingScraper->getFetchedPages()); $this->assertCount(8, $bingScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages())); $this->assertTrue($bingScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } $this->assertTrue($bingScraper->addKeywords(array('foo bad'))); $this->assertTrue($bingScraper->scrapeAll(2, true)); $this->assertCount(2, $bingScraper->getFetchedPages()); $this->assertTrue($bingScraper->serialize('xml', true)); $this->assertCount(0, $bingScraper->getFetchedPages()); $this->assertCount(2, $bingScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($bingScraper->getSerializedPages())); $this->assertTrue($bingScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $yahooScraper = Builder::create($this->engines[3], array(array('foo', 'baz'), 'yahoo')); $outDir = $yahooScraper->getOutDir(); $this->assertFalse($yahooScraper->scrape('bar')); $this->assertFalse($yahooScraper->scrape('baz', 100)); $this->assertFalse($yahooScraper->scrape('baz', 1, 'baz')); $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'foobad')); $this->assertFalse($yahooScraper->scrape('baz', 1, true, 'UTC', 'faz')); $this->assertTrue($yahooScraper->scrape('foo', 2, true, 'Europe/Rome')); $this->assertCount(2, $yahooScraper->getFetchedPages()); $this->assertCount(1, $yahooScraper->getKeywords()); $this->assertTrue($yahooScraper->scrape('baz', 2, true)); $this->assertCount(4, $yahooScraper->getFetchedPages()); $this->assertCount(0, $yahooScraper->getKeywords()); $this->assertFalse($yahooScraper->scrapeAll()); $this->assertTrue($yahooScraper->addKeywords(array('foobaz', 'foobar'))); $this->assertTrue($yahooScraper->scrapeAll(2, true, 'America/Los_Angeles')); $this->assertCount(8, $yahooScraper->getFetchedPages()); $this->assertCount(0, $yahooScraper->getKeywords()); $this->assertFalse($yahooScraper->serialize('baz')); $this->assertTrue($yahooScraper->serialize('xml', true)); $this->assertCount(0, $yahooScraper->getFetchedPages()); $this->assertCount(8, $yahooScraper->getSerializedPages()); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages())); $this->assertTrue($yahooScraper->save(true)); $this->assertCount(0, $yahooScraper->getSerializedPages()); for ($i = 0; $i < count($toCheck); $i++) { $xml = new \XMLReader(); $xml->open($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $xml->setParserProperty(\XMLReader::VALIDATE, true); $this->assertTrue($xml->isValid()); } $this->assertTrue($yahooScraper->addKeywords(array('foobaz'))); $this->assertTrue($yahooScraper->scrapeAll(3, true)); $this->assertTrue($yahooScraper->serialize('json', true)); $toCheck = array_map('Franzip\\SerpScraper\\Helpers\\FileSystemHelper::generateFileName', array_keys($yahooScraper->getSerializedPages())); $this->assertTrue($yahooScraper->save(true)); for ($i = 0; $i < count($toCheck); $i++) { $json = file_get_contents($outDir . DIRECTORY_SEPARATOR . $toCheck[$i]); $this->assertNull($parser->lint($json)); } }
/** * Safely parse the JSON, using a JSON lint library * * @author Jack * @date Sat Feb 21 10:40:14 2015 */ function parse_json($json) { $parser = new \Seld\JsonLint\JsonParser(); $ret = $parser->lint($json); if ($ret) { trigger_error($ret->getMessage()); return null; } else { return json_decode($json); } }
/** * Validates JSON syntax using lint. * * @throws DecodeException * * @return bool */ protected function validateSyntax() { if (!class_exists('\\Seld\\JsonLint\\JsonParser')) { throw new \RuntimeException('If you want to validate JSON syntax using lint, you must require package "seld/jsonlint".'); } $parser = new \Seld\JsonLint\JsonParser(); $result = $parser->lint($this->getContents()); if ($result === null) { if (JSON_ERROR_UTF8 === json_last_error()) { throw new \UnexpectedValueException('"' . $this->getPath() . '" is not encoded in UTF-8, could not parse as JSON'); } return true; } throw DecodeException::create($this->getPath(), ['message' => $result->getMessage(), 'details' => $result->getDetails()]); }
#!/usr/bin/php <?php $config = ['git_urls' => ['https://github.com/Seldaek/jsonlint.git' => 'jsonlint/'], 'autoload_config' => ['jsonlint/src/Seld/JsonLint/' => 'Seld\\JsonLint'], 'example' => function () { $parser = new Seld\JsonLint\JsonParser(); $json = '{"Hello":"World"}'; $out = $parser->parse($json); var_dump($out); }]; if ($return_config) { return $config; } require_once __DIR__ . '/_yf_autoloader.php'; new yf_autoloader($config);