/** * setting process like DSL of Web::Scraper * Sample&Demo is : demos/Diggin/Scraper/ * * @params mixed args1, args2, args3,,, * $thisObejct->process('expresssion', 'key => val, filter, filter,', 'key => val) * like * $thisObejct->process('//div[@class="post-content"]/ul/li/a', 'title => TEXT') * * expression : (depend on)Strategy * [Default] Css Or Xpath * key : results's key. * $scraper->results['key']; * can access as this class's property (by __get method) * $scraper->key * val : (depend on)Strategy * filter : * filtering by * user_func , Zend_Filter_*, Your_Filter_*(implements Zend_Filter_Interface) * * @see Diggin_Scraper_Filter * @return Diggin_Scraper_Process_Aggregate Provides a fluent interface */ public function process($args) { $args = func_get_args(); if ($args[0] instanceof Process) { $this->_processes[] = $args[0]; return $this; } $expression = array_shift($args); foreach ($args as $nametype) { if (is_string($nametype)) { if (strpos($nametype, '=>') !== false) { list($name, $types) = explode('=>', $nametype); } else { throw new Exception\InvalidArgumentException("invalid argument. none with \\'->\\'"); } if (substr(trim($name), -2) == '[]') { $name = substr(trim($name), 0, -2); $arrayflag = true; } else { $arrayflag = false; } $types = trim($types, " '\""); //types to array if (strpos($types, ',') !== false) { $types = explode(',', $types); } //filter exists? if (count($types) === 1) { //none filter $process = new Process(); $process->setExpression($expression); $process->setName(trim($name)); $process->setArrayflag($arrayflag); $process->setType($types); $process->setFilters(false); } else { //filters foreach ($types as $count => $type) { if ($count !== 0) { $filters[] = trim($type, " []'\""); } } $process = new Process(); $process->setExpression($expression); $process->setName(trim($name)); $process->setArrayflag($arrayflag); $process->setType(trim($types[0], " []'\"")); $process->setFilters($filters); } $this->_processes[] = $process; } elseif (is_array($nametype)) { if (!is_int(key($nametype))) { foreach ($nametype as $name => $nm) { if (substr($name, -2) == '[]') { $name = substr($name, 0, -2); $arrayflag = true; } else { $arrayflag = false; } $process = new Process(); $process->setExpression($expression); $process->setName($name); $process->setArrayFlag($arrayflag); $process->setType($nm); $process->setFilters(false); $this->_processes[] = $process; } } else { $process = new Process(); $process->setExpression($expression); $process->setName($nametype[0]); $process->setArrayflag($nametype[1]); $process->setType($nametype[2]); $process->setFilters(is_array($nametype[3]) ? $nametype[3] : false); $this->_processes[] = $process; } } } return $this; }
/** * $scraper->process($expression, $key, $value_type, $filter1, $filter2,,,) */ public function process($args) { $args = func_get_args(); if ($args[0] instanceof Process) { $this->_processes[] = $args[0]; return $this; } // fallback old-style process passed if (count($args) >= 2 and is_string($args[1]) and preg_match('#.+=>.*#', $args[1])) { return call_user_func_array(array('parent', 'process'), $args); } // Validate, process arguments if (1 === count($args)) { throw new Exception\BadMethodCallException("Process requires over 2 arguments"); } elseif (2 === count($args)) { if (is_array($args[1]) and !current($args[1]) instanceof ProcessAggregate) { throw new Exception\InvalidArgumentException("Child Process's value shold be instanceof Diggin\\Scraper\\ProcessAggregate"); } } $expression = array_shift($args); // check child process if (is_array($args[0])) { $name = key($args[0]); if (substr($name, -2) == '[]') { $name = substr($name, 0, -2); $arrayflag = true; } else { $arrayflag = false; } $childprocess = current($args[0]); $process = new Process(); $process->setExpression($expression); $process->setName($name); $process->setArrayflag($arrayflag); $process->setType($childprocess); $process->setFilters(false); $this->_processes[] = $process; } else { $name = array_shift($args); $types = array_shift($args); $filters = count($args > 0) ? $args : false; if (substr($name, -2) == '[]') { $name = substr($name, 0, -2); $arrayflag = true; } else { $arrayflag = false; } $process = new Process(); $process->setExpression($expression); $process->setName(trim($name)); $process->setArrayflag($arrayflag); $process->setType($types); $process->setFilters($filters); $this->_processes[] = $process; } return $this; }