Example #1
0
function get_friend_diaryland($uri)
{
    if ($entry = get_cached($uri)) {
        return array($entry['date'] => $entry);
    } else {
        $page = get($uri);
        if (strlen($page) == 0) {
            print "No data at {$uri}\n";
            return array();
        } else {
            if (!preg_match('!(\\d+-\\d+-\\d+)\\s*-\\s*(\\d+:\\d+)\\s*(p\\.m\\.|a\\.m\\.).*?(<P>.*?)<P><A[^>]*>previous!ms', $page, $matches)) {
                print "Couldn't extract entry from {$uri}\n";
                mail_entry("*****@*****.**", "DiaryLand Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                return array();
            } else {
                $time = explode(':', $matches[2]);
                if ($matches[3] == 'p.m.') {
                    $time[0] += 12;
                }
                $time = join(":", $time);
                $date = $matches[1] . " " . $time;
                $data = $matches[4];
                $friend_uri = $uri;
                $uri = $uri . "/{$date}";
                $entry = compact('date', 'data', 'uri', 'friend_uri');
                put_cache(array($entry));
                return array($date => $entry);
            }
        }
    }
}
Example #2
0
 public function init()
 {
     if ($this->cache) {
         list($this->_routes_list, $this->_named_routes) = get_cached($this->cache_id, [null, null]);
     }
     if ($this->_routes_list === null) {
         $this->init_routes();
     }
 }
 if (isset($permalink) && is_string($permalink)) {
     $newitem->setLink($permalink);
 } else {
     $permalink = $item->get_permalink();
     $newitem->setLink($permalink);
 }
 if (isset($permalink) && is_string($permalink) && strlen($permalink) < 9) {
     continue;
 }
 //if ($permalink && ($response = $http->get($permalink, true)) && $response['status_code'] < 300) {
 // Allowing error codes - some sites return correct content with error status
 // e.g. prospectmagazine.co.uk returns 403
 $cached_page = is_cached($permalink);
 if ($cached_page) {
     debug('Loading processed page form cache...');
     $html = get_cached($permalink);
     $extraction_successful = true;
 } else {
     if (($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) {
         $effective_url = $response['effective_url'];
         if (!url_allowed($effective_url)) {
             continue;
         }
         // check if action defined for returned Content-Type
         $mime_info = get_mime_action_info($response['headers']);
         if (isset($mime_info['action'])) {
             if ($mime_info['action'] == 'exclude') {
                 continue;
                 // skip this feed item entry
             } elseif ($mime_info['action'] == 'link') {
                 if ($mime_info['type'] == 'image') {
Example #4
0
function get_friend_opendiary($uri)
{
    $page = get($uri);
    if (!$page) {
        debug("Couldn't get {$uri}\n");
        return array();
    }
    if (preg_match_all('!"(entryview\\d+\\.asp\\?authorcode=[A-Z0-9]+\\&entry=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) {
        $entryuris = $matches[1];
        foreach ($entryuris as $k => $v) {
            $entryuris[$k] = 'http://www.opendiary.com/' . $v;
        }
    } else {
        debug("None found\n");
        return array();
    }
    $days = array();
    foreach ($entryuris as $entryuri) {
        if ($entry = get_cached($entryuri)) {
            print "got {$entryuri} from cache\n";
            list($date, $time) = split(' ', $entry['date']);
            if (!is_array($days[$date])) {
                $days[$date] = array();
            }
            $days[$date][$time] = $entry;
        } else {
            $page = get($entryuri);
            if (!$page) {
                print "Couldn't get journal entry {$entryuri}\n";
            } else {
                $tagsre = '(?:\\s|<[^>]+?>)+';
                if (preg_match("#<TABLE WIDTH=100%>+{$tagsre}(.*?)</TD>{$tagsre}(\\d+/\\d+/\\d+){$tagsre}" . "(?:Time: (\\d+.\\d+)(am|pm))?(.*?)</TD>#s", $page, $matches)) {
                    $subject = $matches[1];
                    list($month, $day, $year) = explode('/', $matches[2]);
                    $date = sprintf('%s-%s-%s', $year, $month, $day);
                    $ampm = $matches[4];
                    $time = explode('.', $matches[3]);
                    if ($ampm == 'pm') {
                        $time[0] += 12;
                    }
                    $time = $time[0] . ":" . $time[1];
                    $data = $matches[5];
                    if ($time = ":") {
                        $time = "00:00:00";
                    }
                    // Fix up relative URIs:
                    //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data);
                    $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri);
                    print "Got entry {$entryuri}: \n";
                    print_r($entry);
                    print "\n";
                    if (!is_array($days[$date])) {
                        $days[$date] = array();
                    }
                    $days[$date][$time] = $entry;
                    put_cache(array($entry));
                } else {
                    print "Couldn't get entry from {$entryuri}\n";
                    mail_entry("*****@*****.**", "OpenDiary Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                    continue;
                }
            }
        }
    }
    return collapse_times($days);
}
Example #5
0
File: index.php Project: 4nd3r/ilm
        if (false !== ($data = file_get_contents($url))) {
            file_put_contents($cache, $data);
        } else {
            unset($data);
        }
    }
    if (isset($data)) {
        return $data;
    } else {
        return file_get_contents($cache);
    }
}
$out = array();
$trt = 0;
$obs = get_cached('http://www.ilmateenistus.ee/ilma_andmed/xml/observations.php');
$met = get_cached('http://meteo.physic.ut.ee/et/freshwin.php');
foreach (simplexml_load_string($obs)->children() as $s) {
    if (false === stripos($s->name, 'tartu')) {
        continue;
    }
    $tmp = (double) $s->airtemperature;
    $trt += $tmp;
    $out[strtolower($s->name) . '-temp'] = sprintf('%s °C', round($tmp, 1));
}
preg_match('/<B>([0-9\\.\\-]+)\\s&deg;C<\\/B>/i', $met, $m);
if (isset($m[1])) {
    $tmp = (double) $m[1];
    $trt += $tmp;
    $out['tartu-maarjavälja-temp'] = sprintf('%s °C', round($tmp, 1));
}
$out['tartu-keskmine-temp'] = sprintf('%s °C', round($trt / count($out), 1) + 0);
Example #6
0
function get_friend_livejournal($uri)
{
    $page = get($uri);
    if (!$page) {
        return array();
    }
    if (preg_match_all('!"(http://www\\.livejournal\\.com/talkpost\\.bml\\?journal=.*itemid=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) {
        $entryuris = $matches[1];
    }
    if (count($entryuris) == 0) {
        return array();
    }
    $days = array();
    foreach ($entryuris as $entryuri) {
        $entryuri = str_replace("&amp;", "&", $entryuri);
        if ($entry = get_cached($entryuri)) {
            list($date, $time) = split(' ', $entry['date']);
            if (!is_array($days[$date])) {
                $days[$date] = array();
            }
            $days[$date][$time] = $entry;
        } else {
            $page = get($entryuri);
            if ($page) {
                if (preg_match('#<!-- body area -->(.*)<!-- /body area -->#ms', $page, $matches)) {
                    $page = $matches[1];
                    $linkre = '<a[^>]>(\\d+)</a>';
                    $tagsre = '(?:\\s|<[^>]+?>)+';
                    if (preg_match("!(?:said|wrote),{$tagsre}@{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}(\\d+:\\d+:\\d+).*?</CENTER>.*?<ul>(.*)</ul>!ms", $page, $matches)) {
                        $date = $matches[1] . "-" . $matches[2] . "-" . $matches[3];
                        $time = $matches[4];
                        $data = trim($matches[5]);
                        if (preg_match('!^<table.*?>(.*?)</table>(.*)$!ms', $data, $matches)) {
                            $data = trim($matches[2]) . "<table>" . $matches[1] . "</table>";
                        }
                        $data = preg_replace('!^<p>!ms', '', $data);
                        if (preg_match('!^(.*?)<br />(.*)$!ms', $data, $matches)) {
                            $data = trim($matches[2]);
                            $subject = trim($matches[1]);
                        }
                        // Fix up relative URIs:
                        //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data);
                        $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri);
                        //print("<!-- Got entry $entryuri: \n");
                        //print_r($entry);
                        //print("-->\n");
                        if (!is_array($days[$date])) {
                            $days[$date] = array();
                        }
                        $days[$date][$time] = $entry;
                        if (strlen(trim($data)) > 7) {
                            put_cache(array($entry));
                        } else {
                            mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                        }
                    } else {
                        print "Couldn't extract {$entryuri}\n";
                        mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                        continue;
                    }
                } else {
                    continue;
                }
            }
        }
    }
    return collapse_times($days);
}