function get_friend_diaryland($uri) { if ($entry = get_cached($uri)) { return array($entry['date'] => $entry); } else { $page = get($uri); if (strlen($page) == 0) { print "No data at {$uri}\n"; return array(); } else { if (!preg_match('!(\\d+-\\d+-\\d+)\\s*-\\s*(\\d+:\\d+)\\s*(p\\.m\\.|a\\.m\\.).*?(<P>.*?)<P><A[^>]*>previous!ms', $page, $matches)) { print "Couldn't extract entry from {$uri}\n"; mail_entry("*****@*****.**", "DiaryLand Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); return array(); } else { $time = explode(':', $matches[2]); if ($matches[3] == 'p.m.') { $time[0] += 12; } $time = join(":", $time); $date = $matches[1] . " " . $time; $data = $matches[4]; $friend_uri = $uri; $uri = $uri . "/{$date}"; $entry = compact('date', 'data', 'uri', 'friend_uri'); put_cache(array($entry)); return array($date => $entry); } } } }
function get_friend_ljk($uri) { global $entries, $cdatas; $entries = array(); $cdatas = array(); $x = xml_parser_create(); if ($uri[strlen($uri)] != '/') { $nuri = $uri . '/'; } else { $nuri = $uri; } $f = get($nuri . "Auto?limit=" . JOURNAL_LIMITPERFRIEND . "&action=latest"); xml_parser_set_option($x, XML_OPTION_CASE_FOLDING, FALSE); xml_set_element_handler($x, "startElement", "endElement"); xml_set_character_data_handler($x, "cdata"); xml_parse($x, $f, TRUE); $e = $entries; unset($entries); unset($cdatas); foreach ($e as $k => $entry) { $e[$k]['friend_uri'] = $uri; $e[$k]['uri'] = $uri . "/" . $e[$k]['date']; } put_cache($e); return $e; }
function get_friend_opendiary($uri) { $page = get($uri); if (!$page) { debug("Couldn't get {$uri}\n"); return array(); } if (preg_match_all('!"(entryview\\d+\\.asp\\?authorcode=[A-Z0-9]+\\&entry=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) { $entryuris = $matches[1]; foreach ($entryuris as $k => $v) { $entryuris[$k] = 'http://www.opendiary.com/' . $v; } } else { debug("None found\n"); return array(); } $days = array(); foreach ($entryuris as $entryuri) { if ($entry = get_cached($entryuri)) { print "got {$entryuri} from cache\n"; list($date, $time) = split(' ', $entry['date']); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; } else { $page = get($entryuri); if (!$page) { print "Couldn't get journal entry {$entryuri}\n"; } else { $tagsre = '(?:\\s|<[^>]+?>)+'; if (preg_match("#<TABLE WIDTH=100%>+{$tagsre}(.*?)</TD>{$tagsre}(\\d+/\\d+/\\d+){$tagsre}" . "(?:Time: (\\d+.\\d+)(am|pm))?(.*?)</TD>#s", $page, $matches)) { $subject = $matches[1]; list($month, $day, $year) = explode('/', $matches[2]); $date = sprintf('%s-%s-%s', $year, $month, $day); $ampm = $matches[4]; $time = explode('.', $matches[3]); if ($ampm == 'pm') { $time[0] += 12; } $time = $time[0] . ":" . $time[1]; $data = $matches[5]; if ($time = ":") { $time = "00:00:00"; } // Fix up relative URIs: //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data); $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri); print "Got entry {$entryuri}: \n"; print_r($entry); print "\n"; if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; put_cache(array($entry)); } else { print "Couldn't get entry from {$entryuri}\n"; mail_entry("*****@*****.**", "OpenDiary Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); continue; } } } } return collapse_times($days); }
/** * download the files from github */ function downlad_files($file, $manual_id, $cache) { // while (!empty($file)) { // remove each processed file, add the files to be processed for images // } foreach ($file as $key => $value) { // if ($key == 'inkscape-userinterface') { // debug('key', $key); // debug('value', $value); foreach ($value['published'] as $kkey => $vvalue) { // debug('vvalue', $vvalue); if (MANUAL_LOCAL_FILES_REQUEST) { // debug('vvalue', $vvalue); $content = file_get_contents(MANUAL_LOCAL_CONTENT_PATH . $vvalue['raw']); } elseif (!MANUAL_DEBUG_NO_HTTP_REQUEST) { // debug('http_request content', GITHUB_RAW_URL.$vvalue['raw']); $content = get_content_from_github(GITHUB_RAW_CONTENT_URL . $vvalue['raw']); } else { $content = "# Introduction"; /* $content = " ## La fenêtre principale abcd (defgh) [blah] [test](image/inkscape-user_interface-fr.png) [test a](image/inkscape-user_interface-fr.png) "; */ } // debug('content', $content); $matches = array(); if (preg_match_all('/!\\[(.*?)\\]\\((.*?)\\)/', $content, $matches)) { // debug('matches', $matches); for ($i = 0; $i < count($matches[2]); $i++) { $item = $matches[2][$i]; if (array_key_exists('content/' . $key . '/' . $item, $cache)) { // debug('url', GITHUB_RAW_CONTENT_URL.$key.'/'.$item); if (MANUAL_LOCAL_FILES_REQUEST) { $image = file_get_contents(MANUAL_LOCAL_CONTENT_PATH . $key . '/' . $item); } else { $image = get_content_from_github(GITHUB_RAW_CONTENT_URL . $key . '/' . $item); } put_cache($key . '/' . $item, $image, $manual_id); $content = str_replace('![' . $matches[1][$i] . '](' . $item . ')', '![' . $matches[1][$i] . '](cache/' . $manual_id . '/' . $key . '/' . $item . ')', $content); // TODO: find a good way to correctly set the pictures and their paths } else { Manual_log::$warning[] = "The " . $key . '/' . $item . " is referenced but can't be found in the repository"; } } } $cache_filename = $vvalue['raw']; if (array_key_exists('render', $vvalue) && $vvalue['render']['source'] == 'md' && $vvalue['render']['target'] == 'html') { $content = Markdown($content); $cache_filename = $vvalue['render']['filename']; } // debug('content', $content); put_cache($cache_filename, $content, $manual_id); } // } } }
function get_friend_livejournal($uri) { $page = get($uri); if (!$page) { return array(); } if (preg_match_all('!"(http://www\\.livejournal\\.com/talkpost\\.bml\\?journal=.*itemid=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) { $entryuris = $matches[1]; } if (count($entryuris) == 0) { return array(); } $days = array(); foreach ($entryuris as $entryuri) { $entryuri = str_replace("&", "&", $entryuri); if ($entry = get_cached($entryuri)) { list($date, $time) = split(' ', $entry['date']); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; } else { $page = get($entryuri); if ($page) { if (preg_match('#<!-- body area -->(.*)<!-- /body area -->#ms', $page, $matches)) { $page = $matches[1]; $linkre = '<a[^>]>(\\d+)</a>'; $tagsre = '(?:\\s|<[^>]+?>)+'; if (preg_match("!(?:said|wrote),{$tagsre}@{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}(\\d+:\\d+:\\d+).*?</CENTER>.*?<ul>(.*)</ul>!ms", $page, $matches)) { $date = $matches[1] . "-" . $matches[2] . "-" . $matches[3]; $time = $matches[4]; $data = trim($matches[5]); if (preg_match('!^<table.*?>(.*?)</table>(.*)$!ms', $data, $matches)) { $data = trim($matches[2]) . "<table>" . $matches[1] . "</table>"; } $data = preg_replace('!^<p>!ms', '', $data); if (preg_match('!^(.*?)<br />(.*)$!ms', $data, $matches)) { $data = trim($matches[2]); $subject = trim($matches[1]); } // Fix up relative URIs: //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data); $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri); //print("<!-- Got entry $entryuri: \n"); //print_r($entry); //print("-->\n"); if (!is_array($days[$date])) { $days[$date] = array(); } $days[$date][$time] = $entry; if (strlen(trim($data)) > 7) { put_cache(array($entry)); } else { mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); } } else { print "Couldn't extract {$entryuri}\n"; mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}"); continue; } } else { continue; } } } } return collapse_times($days); }