function arrayToXML($array, $passedViaEntry, $allowedContent) { $xmlString = ""; foreach (array_keys($array) as $arrayKey) { // For each array item $isEntry = false; // Define as an entry $linkType = $arrayKey; // Set the arrayKey to the linkType if (is_int($linkType)) { // If a linkType is an INT, then we are manipulating an entry $linkType = "entry"; $isEntry = true; // Set as an entry } $arrayContent = $array[$arrayKey]; // Get the value of this particular array item if (is_array($arrayContent)) { // If the array item content is an array, meaning there is multiple of this type if ($isEntry == false) { // If this is NOT an entry foreach ($arrayContent as $thisTagAttributes) { // For each arrayIndex in the attributesArray $tagContent = ""; if (array_key_exists("@attributes", $thisTagAttributes)) { // If there is an @attributes in thisTagAttributes $thisTagAttributes = $thisTagAttributes["@attributes"]; // Re-assign the tag attributes to this particular @attributes } foreach ($thisTagAttributes as $attribute => $attributeValue) { // For each tag attribute $tagContent = $tagContent . $attribute . '="' . $attributeValue . '" '; } $xmlString = $xmlString . "\n\t<" . $linkType . " " . $tagContent . "/>"; } } else { // If it IS an entry if (filterContent($arrayContent["title"], $allowedContent) !== true) { // If we are NOT going to be filtering this content $tagContent = arrayToXML($arrayContent, true, $allowedContent); // Recursively create the XML for the entry array content $xmlString = $xmlString . "\n<" . $linkType . ">" . $tagContent . "\n</" . $linkType . ">"; } } } else { // If it is NOT an array if ($passedViaEntry == true) { // If we are doing a recursive inner entry parsing $tabChar = "\t"; // Allow the tab character so we can indent the tag } else { $tabChar = ""; // Disallow the tab character } $xmlString = $xmlString . "\n" . $tabChar . "<" . $linkType . ">" . $array[$arrayKey] . "</" . $linkType . ">"; } } return $xmlString; }
function stringFilter($search, $contents) { $search = trim($search); foreach ($contents as $key => $content) { switch ($key) { case "title": case "subtitle": // find content in search // some cases, subtitle only contains 1 character if (strlen($content) >= 3 && filterContent($content, $search)) { return true; } default: // find current search if (filterContent($search, $content)) { return true; } } } return false; }
/** * @param string $content * @return string */ function prepareXmlContent($content) { $content = getPrettyXml(filterContent($content)); $content = preg_replace('/^<\\?xml.*\\n/', '', $content); return $content; }
// Content was obtained so update or add in mysql $query = 'SELECT * from dnint_url_contents WHERE url="' . addslashes($url) . '"'; $stm2 = $dbh->execute($query); if ($stmt->num_row() == 0) { //this URL is new and its contents are not fetched yet $query = "INSERT INTO dnint_url_contents \n SET url='" . addslashes($url) . "', \n url_content='" . addslashes($content) . "', \n fk_dnint_url_id=" . $id; $dbh->execute($query); } else { //this URL exists so just overwrite its contents (and also its FK to make sure we are in sync) $query = "UPDATE dnint_url_contents \n SET url_content='" . addslashes($content) . "', \n fk_dnint_url_id=" . addslashes($id) . " \n WHERE url='" . $url . "'"; $stmt = $dbh->execute($query); } // Semantic parsing phase $sql = $dbh->execute("SELECT id FROM feed_url_contents ORDER BY id DESC LIMIT 1"); list($last_id) = $sql->fetch_array(); $filtered = filterContent($content, $wordsAfterFilter); $numKeywords = 0; $sumDimX = 0; $sumDimY = 0; $negAverage = 0; evaluateFilteredContent($dbh, $filtered, $numKeywords, $sumDimX, $sumDimY); $dimxAverage = round($sumDimX / $numKeywords, 2); $dimyAverage = round($sumDimY / $numKeywords, 2); $query = "INSERT INTO dnint_contents_parsed \n SET url='" . addslashes($url) . "', \n parsed_content='" . addslashes($filtered) . "',\n fk_dnint_url_contents_id=" . addslashes($last_id); $stmt = $dbh->execute($query); // Get the id of the inserted record $newID = mysql_insert_id(); // Now also add a record in the feed_parsed_results table $query = 'INSERT INTO dnint_parsed_results SET dimx_avg=' . addslashes($dimxAverage) . ', dimy_avg=' . addslashes($dimyAverage) . ' ,