function get_friend_diaryland($uri) { if ($entry = get_cached($uri)) { return array($entry['date'] => $entry); } else { $page = get($uri); if (strlen($page) == 0) { print "No data at {$uri}\n"; return array(); } else { if (!preg_match('!(\\d+-\\d+-\\d+)\\s*-\\s*(\\d+:\\d+)\\s*(p\\.m\\.|a\\.m\\.).*?(<P>.*?)<P><A[^>]*>previous!ms', $page, $matches)) { print "Couldn't extract entry from {$uri}\n"; mail_entry("*****@*****.**", "DiaryLand Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); return array(); } else { $time = explode(':', $matches[2]); if ($matches[3] == 'p.m.') { $time[0] += 12; } $time = join(":", $time); $date = $matches[1] . " " . $time; $data = $matches[4]; $friend_uri = $uri; $uri = $uri . "/{$date}"; $entry = compact('date', 'data', 'uri', 'friend_uri'); put_cache(array($entry)); return array($date => $entry); } } } }
public function init() { if ($this->cache) { list($this->_routes_list, $this->_named_routes) = get_cached($this->cache_id, [null, null]); } if ($this->_routes_list === null) { $this->init_routes(); } }
if (isset($permalink) && is_string($permalink)) { $newitem->setLink($permalink); } else { $permalink = $item->get_permalink(); $newitem->setLink($permalink); } if (isset($permalink) && is_string($permalink) && strlen($permalink) < 9) { continue; } //if ($permalink && ($response = $http->get($permalink, true)) && $response['status_code'] < 300) { // Allowing error codes - some sites return correct content with error status // e.g. prospectmagazine.co.uk returns 403 $cached_page = is_cached($permalink); if ($cached_page) { debug('Loading processed page form cache...'); $html = get_cached($permalink); $extraction_successful = true; } else { if (($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) { $effective_url = $response['effective_url']; if (!url_allowed($effective_url)) { continue; } // check if action defined for returned Content-Type $mime_info = get_mime_action_info($response['headers']); if (isset($mime_info['action'])) { if ($mime_info['action'] == 'exclude') { continue; // skip this feed item entry } elseif ($mime_info['action'] == 'link') { if ($mime_info['type'] == 'image') {
function get_friend_opendiary($uri) { $page = get($uri); if (!$page) { debug("Couldn't get {$uri}\n"); return array(); } if (preg_match_all('!"(entryview\\d+\\.asp\\?authorcode=[A-Z0-9]+\\&entry=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) { $entryuris = $matches[1]; foreach ($entryuris as $k => $v) { $entryuris[$k] = 'http://www.opendiary.com/' . $v; } } else { debug("None found\n"); return array(); } $days = array(); foreach ($entryuris as $entryuri) { if ($entry = get_cached($entryuri)) { print "got {$entryuri} from cache\n"; list($date, $time) = split(' ', $entry['date']); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; } else { $page = get($entryuri); if (!$page) { print "Couldn't get journal entry {$entryuri}\n"; } else { $tagsre = '(?:\\s|<[^>]+?>)+'; if (preg_match("#<TABLE WIDTH=100%>+{$tagsre}(.*?)</TD>{$tagsre}(\\d+/\\d+/\\d+){$tagsre}" . "(?:Time: (\\d+.\\d+)(am|pm))?(.*?)</TD>#s", $page, $matches)) { $subject = $matches[1]; list($month, $day, $year) = explode('/', $matches[2]); $date = sprintf('%s-%s-%s', $year, $month, $day); $ampm = $matches[4]; $time = explode('.', $matches[3]); if ($ampm == 'pm') { $time[0] += 12; } $time = $time[0] . ":" . $time[1]; $data = $matches[5]; if ($time = ":") { $time = "00:00:00"; } // Fix up relative URIs: //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data); $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri); print "Got entry {$entryuri}: \n"; print_r($entry); print "\n"; if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; put_cache(array($entry)); } else { print "Couldn't get entry from {$entryuri}\n"; mail_entry("*****@*****.**", "OpenDiary Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); continue; } } } } return collapse_times($days); }
if (false !== ($data = file_get_contents($url))) { file_put_contents($cache, $data); } else { unset($data); } } if (isset($data)) { return $data; } else { return file_get_contents($cache); } } $out = array(); $trt = 0; $obs = get_cached('http://www.ilmateenistus.ee/ilma_andmed/xml/observations.php'); $met = get_cached('http://meteo.physic.ut.ee/et/freshwin.php'); foreach (simplexml_load_string($obs)->children() as $s) { if (false === stripos($s->name, 'tartu')) { continue; } $tmp = (double) $s->airtemperature; $trt += $tmp; $out[strtolower($s->name) . '-temp'] = sprintf('%s °C', round($tmp, 1)); } preg_match('/<B>([0-9\\.\\-]+)\\s°C<\\/B>/i', $met, $m); if (isset($m[1])) { $tmp = (double) $m[1]; $trt += $tmp; $out['tartu-maarjavälja-temp'] = sprintf('%s °C', round($tmp, 1)); } $out['tartu-keskmine-temp'] = sprintf('%s °C', round($trt / count($out), 1) + 0);
function get_friend_livejournal($uri) { $page = get($uri); if (!$page) { return array(); } if (preg_match_all('!"(http://www\\.livejournal\\.com/talkpost\\.bml\\?journal=.*itemid=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) { $entryuris = $matches[1]; } if (count($entryuris) == 0) { return array(); } $days = array(); foreach ($entryuris as $entryuri) { $entryuri = str_replace("&", "&", $entryuri); if ($entry = get_cached($entryuri)) { list($date, $time) = split(' ', $entry['date']); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; } else { $page = get($entryuri); if ($page) { if (preg_match('#<!-- body area -->(.*)<!-- /body area -->#ms', $page, $matches)) { $page = $matches[1]; $linkre = '<a[^>]>(\\d+)</a>'; $tagsre = '(?:\\s|<[^>]+?>)+'; if (preg_match("!(?:said|wrote),{$tagsre}@{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}(\\d+:\\d+:\\d+).*?</CENTER>.*?<ul>(.*)</ul>!ms", $page, $matches)) { $date = $matches[1] . "-" . $matches[2] . "-" . $matches[3]; $time = $matches[4]; $data = trim($matches[5]); if (preg_match('!^<table.*?>(.*?)</table>(.*)$!ms', $data, $matches)) { $data = trim($matches[2]) . "<table>" . $matches[1] . "</table>"; } $data = preg_replace('!^<p>!ms', '', $data); if (preg_match('!^(.*?)<br />(.*)$!ms', $data, $matches)) { $data = trim($matches[2]); $subject = trim($matches[1]); } // Fix up relative URIs: //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data); $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri); //print("<!-- Got entry $entryuri: \n"); //print_r($entry); //print("-->\n"); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; if (strlen(trim($data)) > 7) { put_cache(array($entry)); } else { mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); } } else { print "Couldn't extract {$entryuri}\n"; mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); continue; } } else { continue; } } } } return collapse_times($days); }