function get_friend_diaryland($uri) { if ($entry = get_cached($uri)) { return array($entry['date'] => $entry); } else { $page = get($uri); if (strlen($page) == 0) { print "No data at {$uri}\n"; return array(); } else { if (!preg_match('!(\\d+-\\d+-\\d+)\\s*-\\s*(\\d+:\\d+)\\s*(p\\.m\\.|a\\.m\\.).*?(<P>.*?)<P><A[^>]*>previous!ms', $page, $matches)) { print "Couldn't extract entry from {$uri}\n"; mail_entry("*****@*****.**", "DiaryLand Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); return array(); } else { $time = explode(':', $matches[2]); if ($matches[3] == 'p.m.') { $time[0] += 12; } $time = join(":", $time); $date = $matches[1] . " " . $time; $data = $matches[4]; $friend_uri = $uri; $uri = $uri . "/{$date}"; $entry = compact('date', 'data', 'uri', 'friend_uri'); put_cache(array($entry)); return array($date => $entry); } } } }
function get_friend_opendiary($uri) { $page = get($uri); if (!$page) { debug("Couldn't get {$uri}\n"); return array(); } if (preg_match_all('!"(entryview\\d+\\.asp\\?authorcode=[A-Z0-9]+\\&entry=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) { $entryuris = $matches[1]; foreach ($entryuris as $k => $v) { $entryuris[$k] = 'http://www.opendiary.com/' . $v; } } else { debug("None found\n"); return array(); } $days = array(); foreach ($entryuris as $entryuri) { if ($entry = get_cached($entryuri)) { print "got {$entryuri} from cache\n"; list($date, $time) = split(' ', $entry['date']); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; } else { $page = get($entryuri); if (!$page) { print "Couldn't get journal entry {$entryuri}\n"; } else { $tagsre = '(?:\\s|<[^>]+?>)+'; if (preg_match("#<TABLE WIDTH=100%>+{$tagsre}(.*?)</TD>{$tagsre}(\\d+/\\d+/\\d+){$tagsre}" . "(?:Time: (\\d+.\\d+)(am|pm))?(.*?)</TD>#s", $page, $matches)) { $subject = $matches[1]; list($month, $day, $year) = explode('/', $matches[2]); $date = sprintf('%s-%s-%s', $year, $month, $day); $ampm = $matches[4]; $time = explode('.', $matches[3]); if ($ampm == 'pm') { $time[0] += 12; } $time = $time[0] . ":" . $time[1]; $data = $matches[5]; if ($time = ":") { $time = "00:00:00"; } // Fix up relative URIs: //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data); $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri); print "Got entry {$entryuri}: \n"; print_r($entry); print "\n"; if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; put_cache(array($entry)); } else { print "Couldn't get entry from {$entryuri}\n"; mail_entry("*****@*****.**", "OpenDiary Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); continue; } } } } return collapse_times($days); }
function get_friend_livejournal($uri) { $page = get($uri); if (!$page) { return array(); } if (preg_match_all('!"(http://www\\.livejournal\\.com/talkpost\\.bml\\?journal=.*itemid=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) { $entryuris = $matches[1]; } if (count($entryuris) == 0) { return array(); } $days = array(); foreach ($entryuris as $entryuri) { $entryuri = str_replace("&", "&", $entryuri); if ($entry = get_cached($entryuri)) { list($date, $time) = split(' ', $entry['date']); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; } else { $page = get($entryuri); if ($page) { if (preg_match('#<!-- body area -->(.*)<!-- /body area -->#ms', $page, $matches)) { $page = $matches[1]; $linkre = '<a[^>]>(\\d+)</a>'; $tagsre = '(?:\\s|<[^>]+?>)+'; if (preg_match("!(?:said|wrote),{$tagsre}@{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}(\\d+:\\d+:\\d+).*?</CENTER>.*?<ul>(.*)</ul>!ms", $page, $matches)) { $date = $matches[1] . "-" . $matches[2] . "-" . $matches[3]; $time = $matches[4]; $data = trim($matches[5]); if (preg_match('!^<table.*?>(.*?)</table>(.*)$!ms', $data, $matches)) { $data = trim($matches[2]) . "<table>" . $matches[1] . "</table>"; } $data = preg_replace('!^<p>!ms', '', $data); if (preg_match('!^(.*?)<br />(.*)$!ms', $data, $matches)) { $data = trim($matches[2]); $subject = trim($matches[1]); } // Fix up relative URIs: //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data); $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri); //print("<!-- Got entry $entryuri: \n"); //print_r($entry); //print("-->\n"); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; if (strlen(trim($data)) > 7) { put_cache(array($entry)); } else { mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); } } else { print "Couldn't extract {$entryuri}\n"; mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); continue; } } else { continue; } } } } return collapse_times($days); }