/** * @param string $regexp * @param nc_search_document_parser_html_fragment|null $recipient * @param boolean $remove * @return boolean success */ public function extract_regexp($regexp, self $recipient = null, $remove = false) { $html = $this->get_html(); if (!preg_match_all($regexp . "u", $html, $matches)) { return false; } if ($recipient) { $recipient->append_fragment(new self(join("\n", $matches[1]))); } if ($remove) { // it's easier to dismiss the old DOMDocument than to delete all nodes properly $this->set_html(str_replace($matches[0], "", $html)); } return true; }
/** * * @param string $query_string * @param nc_search_document_parser_html_fragment $source * @param nc_search_document_parser_html_fragment|null $recipient * @param boolean $remove * @param boolean $only_first * @return boolean success */ protected function execute_queries($query_string, nc_search_document_parser_html_fragment $source, $recipient, $remove, $only_first) { if (!trim($query_string)) { return false; } if (preg_match("/^[\\w \\-]+\$/", $query_string)) { // tag names delimited with a space $queries = explode(" ", $query_string); } else { $queries = explode("\n", $query_string); } $matched = false; foreach ($queries as $query) { $query = trim($query); if ($query[0] == "#") { $matched = $source->extract_regexp($query, $recipient, $remove); } elseif (strpos($query, "/") !== false) { $matched = $source->extract_xpath($query, $recipient, $remove); } elseif (preg_match("/^[\\w\\-]+\$/", $query)) { $matched = $source->extract_tag($query, $recipient, $remove); } if ($matched && $only_first) { break; } } return $matched; }