Пример #1
0
 print "<rss version='2.0'>\n";
 print "  <channel>\n";
 print "    <title>{$title} - via rsshose.com</title>\n";
 print "    <link>{$htmlurl}</link>\n";
 print "    <description>{$title} - from http://rsshose.com</description>\n";
 # RSS items
 $result = mysql_query("select a.id,a.rss_title,a.rss_url,a.rss_desc,a.readability_title,a.readability_content,unix_timestamp(a.crawl_date) as crawldate from articles as a, user_feed as uf, feeds as f where uf.feed_id = f.id and a.feed_id = f.id and uf.id = '{$user_feed_id}' and f.last_fetch > date_sub(now(),interval 24 HOUR) order by a.crawl_date desc");
 while ($row = mysql_fetch_assoc($result)) {
     $id = $row['id'];
     $rss_title = htmlspecialchars(clean_rss($row['rss_title']));
     $rss_url = $row['rss_url'];
     $crawldate = $row['crawldate'];
     $ts = date("D, d M Y H:i:s T", $crawldate);
     $rss_desc = clean_rss($row['rss_desc']);
     $readability_title = htmlspecialchars(clean_rss($row['readability_title']));
     $readability_content = clean_rss($row['readability_content']);
     print "    <item>\n";
     if (($readability_content != "<div></div>" or $readability_content != "") and $flag_sanitize == 1) {
         print "      <title>{$rss_title}</title>\n";
         #				$readability_content = "<h2>Original Article title: $readability_title</h2>" . $readability_content;
     } else {
         print "      <title>{$rss_title}</title>\n";
     }
     print "      <link>{$rss_url}</link>\n";
     $guid = hash('sha1', $rss_url);
     print "      <guid>{$guid}</guid>\n";
     print "      <pubDate>{$ts}</pubDate>\n";
     if (($readability_content != "<div></div>" or $readability_content != "") and $flag_sanitize == 1) {
         print "      <description>\n <![CDATA[ {$readability_content} ]]> \n</description>\n";
         #				print ("      <description>\n <![CDATA[ $readability_content <br> <h3>Original RSS content:</h3><br> $rss_desc ]]> \n</description>\n");
     } else {
Пример #2
0
function process_message($msg)
{
    global $debug;
    global $out_ch;
    global $out_exchange;
    global $useragent;
    $json = $msg->body;
    $ob = json_decode($json);
    $url = $ob->xmlurl;
    $crawl = $ob->crawl_articles;
    $feed_id = $ob->feed_id;
    $type = $ob->feed_type;
    print "FEED: {$url}\n";
    $crl = curl_init();
    curl_setopt($crl, CURLOPT_URL, $url);
    curl_setopt($crl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($crl, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($crl, CURLOPT_MAXCONNECTS, 30);
    curl_setopt($crl, CURLOPT_TIMEOUT, 5);
    curl_setopt($crl, CURLOPT_USERAGENT, $useragent);
    $rss_xml = curl_exec($crl);
    curl_close($crl);
    $xml = simplexml_load_string($rss_xml);
    #    $pr = print_r($xml);
    if ($debug) {
        $handle = fopen("rss.txt", "w");
        fwrite($handle, $rss_xml);
        fclose($handle);
    }
    if (isset($xml->channel->item)) {
        $item_arr = $xml->channel->item;
    } elseif (isset($xml->item)) {
        $item_arr = $xml->item;
    }
    if (!isset($item_arr) and $type == 'rss') {
        # couldn't parse the RSS file - try atom2rss converter
        $alturl = "http://rsshose.com/atom2rss.php?url=";
        $alturl .= urlencode($url);
        $result = mysql_query("update feeds set type = 'atom', alturl = '{$alturl}' where id = '{$feed_id}'");
    } elseif (!isset($item_arr) and $type == 'atom') {
        # doesn't parse through the atom2rss converter either - set to unknown
        $result = mysql_query("update feeds set type = 'unknown', alturl = NULL where id = '{$feed_id}'");
    } else {
        $result = mysql_query("select * from user_feed where feed_id = '{$feed_id}' and flag_sanitize = '1'");
        $num_rows = mysql_num_rows($result);
        if ($num_rows > 0) {
            $sanitize = 1;
        } else {
            $sanitize = 0;
        }
        # rss is happy
        foreach ($item_arr as $items) {
            $orig_title = (string) $items->title;
            $orig_title = clean_rss($orig_title);
            $orig_link = (string) $items->link;
            $orig_desc = (string) $items->description;
            $orig_desc = clean_rss($orig_desc);
            $title = addslashes($orig_title);
            $link = addslashes($orig_link);
            $desc = addslashes($orig_desc);
            #	    echo "$orig_title ----> $orig_link\n";
            # insert article into article table if doesn't already exist
            $result = mysql_query("insert into articles values ('','{$feed_id}','{$title}','{$link}','{$desc}',now(),'','')");
            if (mysql_errno()) {
                #            	echo "MySQL error ".mysql_errno().": ".mysql_error()."\n";
            } else {
                print "\tNew Article: {$orig_title}\n";
                #		Just sanitize everything for now
                #		if ($sanitize == 1) {
                $last_id = mysql_insert_id();
                $task = array('article_id' => $last_id, 'url' => $orig_link);
                $msg_body = json_encode($task);
                $out_msg = new AMQPMessage($msg_body, array('content_type' => 'text/plain', 'delivery_mode' => 2));
                $out_ch->basic_publish($out_msg, $out_exchange);
                print "\tTO SANITIZE QUEUE: {$msg_body}\n";
                #		}
            }
        }
    }
    $msg->delivery_info['channel']->basic_ack($msg->delivery_info['delivery_tag']);
    $result = mysql_query("update feeds set last_fetch = now() where id = '{$feed_id}'");
    #    sleep(1);
    #    exit(0);
    #    echo "\n";
}