public function getStrings($values) { $contentStrings = array(); $xmlTokenizer = new taoItems_models_classes_search_XmlItemContentTokenizer(); foreach ($values as $valueUri) { $file = new core_kernel_file_File($valueUri); try { $content = file_get_contents($file->getAbsolutePath()); if ($content === false) { common_Logger::w('File ' . $file->getAbsolutePath() . ' not found for item'); } else { // Try to make it a DOM Document... $dom = new DOMDocument('1.0', 'UTF-8'); if (@$dom->loadXML($content) === true) { $contentStrings = array_merge($contentStrings, $xmlTokenizer->getStrings($dom)); unset($dom); } else { common_Logger::d('Skipped non XML content for ' . $file->getUri()); } } } catch (common_Exception $exc) { common_Logger::w('Invalid file ' . $valueUri . ' for ItemContentTokenizer: ' . $exc->getMessage()); } } return $contentStrings; }
public function testSimpleXmlContent() { $dom = new DOMDocument('1.0', 'UTF-8'); $dom->loadXML(' <myTag attribute="blabla"> <p> A paragraph </p> with some other text... </myTag> '); $tokenizer = new taoItems_models_classes_search_XmlItemContentTokenizer(); $tokens = $tokenizer->getStrings($dom); $this->assertEquals('A paragraph', $tokens[0]); $this->assertEquals('with some other text...', $tokens[1]); }
public function getStrings($values) { $contentStrings = array(); $xmlTokenizer = new taoItems_models_classes_search_XmlItemContentTokenizer(); $rawTokenizer = new RawValue(); foreach ($values as $valueUri) { $file = new core_kernel_file_File($valueUri); $content = file_get_contents($file->getAbsolutePath()); if ($content === false) { common_Logger::w('File ' . $file->getAbsolutePath() . ' not found for fileressource ' . $itemContent->getUri()); } else { // Try to make it a DOM Document... $dom = new DOMDocument('1.0', 'UTF-8'); if (@$dom->loadXML($content) === true) { $contentStrings = array_merge($contentStrings, $xmlTokenizer->getStrings($dom)); unset($dom); } else { $contentStrings = array_merge($contentStrings, $rawTokenizer->getStrings($content)); } } } return $contentStrings; }