function findDocuments() { //Find documents and metadata from various sources //for loop through each source // for each document found in the source: $doc = new document(); //How to save document found in the current source $doc->add_url("http://eventful.com/event/1"); $doc->name = "dancefestival"; $doc->add_artist("who"); $doc->description = "This is a test, for the decompose-test. Decomposition available availability. It didn't work filing"; $doc = decomposeDocument($doc); $doc = multiLabelClassification($doc); foreach ($doc->tags as $tag) { echo "{$tag} <br>"; } $doc = clusterDocument($doc); $doc = findDuplicate($doc); saveToIndex($doc); }
$html_content = getContent($url_list[$i]); $html = phpQuery::newDocumentHTML($html_content); $description = $html->find("meta[name='description']")->attr('content'); if ($description != '') { $result['description'][$url_list[$i]] = $description; } $title = $html->find("title")->text(); if ($title != '') { $result['title'][$url_list[$i]] = $title; } } if (isset($result['description'])) { $duplicates_desc = findDuplicate($result['description']); } if (isset($result['title'])) { $duplicates_title = findDuplicate($result['title']); } $duplicates = array_combine($duplicates_desc, $duplicates_title); foreach ($duplicates as $key => $val) { $str .= "<tr><td>{$key}</td><td>{$val}</td></tr>"; } if ($str == '') { $str = "<tr><td>Duplicates descriptions not found</td><td>Duplicates descriptions not found</td></tr>"; } echo $str; } } } if ($_POST['action'] == 'checkRobots') { $domains = $_POST['domains']; $robots = $_POST['robots'];