<?php if (!isset($argv[1])) { exit('Please provide a url as argument.'); } $url = $argv[1]; if (false === filter_var($url, FILTER_VALIDATE_URL)) { exit("{$url} is not a valid URL"); } require_once __DIR__ . '/../vendor/autoload.php'; $content = file_get_contents($url); $parser = new \Algolia\DOMParser(); if (isset($argv[2]) && is_string($argv[2])) { $parser->setRootSelector($argv[2]); } $records = $parser->parse($content); var_dump($records);
<?php require_once __DIR__ . '/../vendor/autoload.php'; $article = file_get_contents('https://blog.algolia.com/how-we-re-invented-our-office-space-in-paris/'); $parser = new \Algolia\DOMParser(); $parser->setExcludeSelectors(array('pre', '.entry-meta', 'div.rp4wp-related-posts')); $parser->setRootSelector('article.post'); $records = $parser->parse($article); $json = json_encode($records, JSON_PRETTY_PRINT); echo $json;
<?php require_once __DIR__ . '/../vendor/autoload.php'; $article = file_get_contents('https://medium.engineering/the-stack-that-helped-medium-drive-2-6-millennia-of-reading-time-e56801f7c492'); $parser = new \Algolia\DOMParser(); $parser->setRootSelector('main'); $records = $parser->parse($article); $json = json_encode($records, JSON_PRETTY_PRINT); echo $json;