print "<rss version='2.0'>\n"; print " <channel>\n"; print " <title>{$title} - via rsshose.com</title>\n"; print " <link>{$htmlurl}</link>\n"; print " <description>{$title} - from http://rsshose.com</description>\n"; # RSS items $result = mysql_query("select a.id,a.rss_title,a.rss_url,a.rss_desc,a.readability_title,a.readability_content,unix_timestamp(a.crawl_date) as crawldate from articles as a, user_feed as uf, feeds as f where uf.feed_id = f.id and a.feed_id = f.id and uf.id = '{$user_feed_id}' and f.last_fetch > date_sub(now(),interval 24 HOUR) order by a.crawl_date desc"); while ($row = mysql_fetch_assoc($result)) { $id = $row['id']; $rss_title = htmlspecialchars(clean_rss($row['rss_title'])); $rss_url = $row['rss_url']; $crawldate = $row['crawldate']; $ts = date("D, d M Y H:i:s T", $crawldate); $rss_desc = clean_rss($row['rss_desc']); $readability_title = htmlspecialchars(clean_rss($row['readability_title'])); $readability_content = clean_rss($row['readability_content']); print " <item>\n"; if (($readability_content != "<div></div>" or $readability_content != "") and $flag_sanitize == 1) { print " <title>{$rss_title}</title>\n"; # $readability_content = "<h2>Original Article title: $readability_title</h2>" . $readability_content; } else { print " <title>{$rss_title}</title>\n"; } print " <link>{$rss_url}</link>\n"; $guid = hash('sha1', $rss_url); print " <guid>{$guid}</guid>\n"; print " <pubDate>{$ts}</pubDate>\n"; if (($readability_content != "<div></div>" or $readability_content != "") and $flag_sanitize == 1) { print " <description>\n <![CDATA[ {$readability_content} ]]> \n</description>\n"; # print (" <description>\n <![CDATA[ $readability_content <br> <h3>Original RSS content:</h3><br> $rss_desc ]]> \n</description>\n"); } else {
function process_message($msg) { global $debug; global $out_ch; global $out_exchange; global $useragent; $json = $msg->body; $ob = json_decode($json); $url = $ob->xmlurl; $crawl = $ob->crawl_articles; $feed_id = $ob->feed_id; $type = $ob->feed_type; print "FEED: {$url}\n"; $crl = curl_init(); curl_setopt($crl, CURLOPT_URL, $url); curl_setopt($crl, CURLOPT_RETURNTRANSFER, 1); curl_setopt($crl, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($crl, CURLOPT_MAXCONNECTS, 30); curl_setopt($crl, CURLOPT_TIMEOUT, 5); curl_setopt($crl, CURLOPT_USERAGENT, $useragent); $rss_xml = curl_exec($crl); curl_close($crl); $xml = simplexml_load_string($rss_xml); # $pr = print_r($xml); if ($debug) { $handle = fopen("rss.txt", "w"); fwrite($handle, $rss_xml); fclose($handle); } if (isset($xml->channel->item)) { $item_arr = $xml->channel->item; } elseif (isset($xml->item)) { $item_arr = $xml->item; } if (!isset($item_arr) and $type == 'rss') { # couldn't parse the RSS file - try atom2rss converter $alturl = "http://rsshose.com/atom2rss.php?url="; $alturl .= urlencode($url); $result = mysql_query("update feeds set type = 'atom', alturl = '{$alturl}' where id = '{$feed_id}'"); } elseif (!isset($item_arr) and $type == 'atom') { # doesn't parse through the atom2rss converter either - set to unknown $result = mysql_query("update feeds set type = 'unknown', alturl = NULL where id = '{$feed_id}'"); } else { $result = mysql_query("select * from user_feed where feed_id = '{$feed_id}' and flag_sanitize = '1'"); $num_rows = mysql_num_rows($result); if ($num_rows > 0) { $sanitize = 1; } else { $sanitize = 0; } # rss is happy foreach ($item_arr as $items) { $orig_title = (string) $items->title; $orig_title = clean_rss($orig_title); $orig_link = (string) $items->link; $orig_desc = (string) $items->description; $orig_desc = clean_rss($orig_desc); $title = addslashes($orig_title); $link = addslashes($orig_link); $desc = addslashes($orig_desc); # echo "$orig_title ----> $orig_link\n"; # insert article into article table if doesn't already exist $result = mysql_query("insert into articles values ('','{$feed_id}','{$title}','{$link}','{$desc}',now(),'','')"); if (mysql_errno()) { # echo "MySQL error ".mysql_errno().": ".mysql_error()."\n"; } else { print "\tNew Article: {$orig_title}\n"; # Just sanitize everything for now # if ($sanitize == 1) { $last_id = mysql_insert_id(); $task = array('article_id' => $last_id, 'url' => $orig_link); $msg_body = json_encode($task); $out_msg = new AMQPMessage($msg_body, array('content_type' => 'text/plain', 'delivery_mode' => 2)); $out_ch->basic_publish($out_msg, $out_exchange); print "\tTO SANITIZE QUEUE: {$msg_body}\n"; # } } } } $msg->delivery_info['channel']->basic_ack($msg->delivery_info['delivery_tag']); $result = mysql_query("update feeds set last_fetch = now() where id = '{$feed_id}'"); # sleep(1); # exit(0); # echo "\n"; }