Example #1
0
function get_friend_diaryland($uri)
{
    if ($entry = get_cached($uri)) {
        return array($entry['date'] => $entry);
    } else {
        $page = get($uri);
        if (strlen($page) == 0) {
            print "No data at {$uri}\n";
            return array();
        } else {
            if (!preg_match('!(\\d+-\\d+-\\d+)\\s*-\\s*(\\d+:\\d+)\\s*(p\\.m\\.|a\\.m\\.).*?(<P>.*?)<P><A[^>]*>previous!ms', $page, $matches)) {
                print "Couldn't extract entry from {$uri}\n";
                mail_entry("*****@*****.**", "DiaryLand Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                return array();
            } else {
                $time = explode(':', $matches[2]);
                if ($matches[3] == 'p.m.') {
                    $time[0] += 12;
                }
                $time = join(":", $time);
                $date = $matches[1] . " " . $time;
                $data = $matches[4];
                $friend_uri = $uri;
                $uri = $uri . "/{$date}";
                $entry = compact('date', 'data', 'uri', 'friend_uri');
                put_cache(array($entry));
                return array($date => $entry);
            }
        }
    }
}
function get_friend_ljk($uri)
{
    global $entries, $cdatas;
    $entries = array();
    $cdatas = array();
    $x = xml_parser_create();
    if ($uri[strlen($uri)] != '/') {
        $nuri = $uri . '/';
    } else {
        $nuri = $uri;
    }
    $f = get($nuri . "Auto?limit=" . JOURNAL_LIMITPERFRIEND . "&action=latest");
    xml_parser_set_option($x, XML_OPTION_CASE_FOLDING, FALSE);
    xml_set_element_handler($x, "startElement", "endElement");
    xml_set_character_data_handler($x, "cdata");
    xml_parse($x, $f, TRUE);
    $e = $entries;
    unset($entries);
    unset($cdatas);
    foreach ($e as $k => $entry) {
        $e[$k]['friend_uri'] = $uri;
        $e[$k]['uri'] = $uri . "/" . $e[$k]['date'];
    }
    put_cache($e);
    return $e;
}
Example #3
0
function get_friend_opendiary($uri)
{
    $page = get($uri);
    if (!$page) {
        debug("Couldn't get {$uri}\n");
        return array();
    }
    if (preg_match_all('!"(entryview\\d+\\.asp\\?authorcode=[A-Z0-9]+\\&entry=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) {
        $entryuris = $matches[1];
        foreach ($entryuris as $k => $v) {
            $entryuris[$k] = 'http://www.opendiary.com/' . $v;
        }
    } else {
        debug("None found\n");
        return array();
    }
    $days = array();
    foreach ($entryuris as $entryuri) {
        if ($entry = get_cached($entryuri)) {
            print "got {$entryuri} from cache\n";
            list($date, $time) = split(' ', $entry['date']);
            if (!is_array($days[$date])) {
                $days[$date] = array();
            }
            $days[$date][$time] = $entry;
        } else {
            $page = get($entryuri);
            if (!$page) {
                print "Couldn't get journal entry {$entryuri}\n";
            } else {
                $tagsre = '(?:\\s|<[^>]+?>)+';
                if (preg_match("#<TABLE WIDTH=100%>+{$tagsre}(.*?)</TD>{$tagsre}(\\d+/\\d+/\\d+){$tagsre}" . "(?:Time: (\\d+.\\d+)(am|pm))?(.*?)</TD>#s", $page, $matches)) {
                    $subject = $matches[1];
                    list($month, $day, $year) = explode('/', $matches[2]);
                    $date = sprintf('%s-%s-%s', $year, $month, $day);
                    $ampm = $matches[4];
                    $time = explode('.', $matches[3]);
                    if ($ampm == 'pm') {
                        $time[0] += 12;
                    }
                    $time = $time[0] . ":" . $time[1];
                    $data = $matches[5];
                    if ($time = ":") {
                        $time = "00:00:00";
                    }
                    // Fix up relative URIs:
                    //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data);
                    $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri);
                    print "Got entry {$entryuri}: \n";
                    print_r($entry);
                    print "\n";
                    if (!is_array($days[$date])) {
                        $days[$date] = array();
                    }
                    $days[$date][$time] = $entry;
                    put_cache(array($entry));
                } else {
                    print "Couldn't get entry from {$entryuri}\n";
                    mail_entry("*****@*****.**", "OpenDiary Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                    continue;
                }
            }
        }
    }
    return collapse_times($days);
}
/**
 * download the files from github
 */
function downlad_files($file, $manual_id, $cache)
{
    // while (!empty($file)) {
    // remove each processed file, add the files to be processed for images
    // }
    foreach ($file as $key => $value) {
        // if ($key == 'inkscape-userinterface') {
        // debug('key', $key);
        // debug('value', $value);
        foreach ($value['published'] as $kkey => $vvalue) {
            // debug('vvalue', $vvalue);
            if (MANUAL_LOCAL_FILES_REQUEST) {
                // debug('vvalue', $vvalue);
                $content = file_get_contents(MANUAL_LOCAL_CONTENT_PATH . $vvalue['raw']);
            } elseif (!MANUAL_DEBUG_NO_HTTP_REQUEST) {
                // debug('http_request content', GITHUB_RAW_URL.$vvalue['raw']);
                $content = get_content_from_github(GITHUB_RAW_CONTENT_URL . $vvalue['raw']);
            } else {
                $content = "# Introduction";
                /*
                $content = "
                ## La fenĂȘtre principale
                
                abcd (defgh) [blah]
                [test](image/inkscape-user_interface-fr.png)
                
                [test a](image/inkscape-user_interface-fr.png)
                ";
                */
            }
            // debug('content', $content);
            $matches = array();
            if (preg_match_all('/!\\[(.*?)\\]\\((.*?)\\)/', $content, $matches)) {
                // debug('matches', $matches);
                for ($i = 0; $i < count($matches[2]); $i++) {
                    $item = $matches[2][$i];
                    if (array_key_exists('content/' . $key . '/' . $item, $cache)) {
                        // debug('url', GITHUB_RAW_CONTENT_URL.$key.'/'.$item);
                        if (MANUAL_LOCAL_FILES_REQUEST) {
                            $image = file_get_contents(MANUAL_LOCAL_CONTENT_PATH . $key . '/' . $item);
                        } else {
                            $image = get_content_from_github(GITHUB_RAW_CONTENT_URL . $key . '/' . $item);
                        }
                        put_cache($key . '/' . $item, $image, $manual_id);
                        $content = str_replace('![' . $matches[1][$i] . '](' . $item . ')', '![' . $matches[1][$i] . '](cache/' . $manual_id . '/' . $key . '/' . $item . ')', $content);
                        // TODO: find a good way to correctly set the pictures and their paths
                    } else {
                        Manual_log::$warning[] = "The " . $key . '/' . $item . " is referenced but can't be found in the repository";
                    }
                }
            }
            $cache_filename = $vvalue['raw'];
            if (array_key_exists('render', $vvalue) && $vvalue['render']['source'] == 'md' && $vvalue['render']['target'] == 'html') {
                $content = Markdown($content);
                $cache_filename = $vvalue['render']['filename'];
            }
            // debug('content', $content);
            put_cache($cache_filename, $content, $manual_id);
        }
        // }
    }
}
Example #5
0
function get_friend_livejournal($uri)
{
    $page = get($uri);
    if (!$page) {
        return array();
    }
    if (preg_match_all('!"(http://www\\.livejournal\\.com/talkpost\\.bml\\?journal=.*itemid=(\\d+))"!', $page, $matches, PREG_PATTERN_ORDER)) {
        $entryuris = $matches[1];
    }
    if (count($entryuris) == 0) {
        return array();
    }
    $days = array();
    foreach ($entryuris as $entryuri) {
        $entryuri = str_replace("&amp;", "&", $entryuri);
        if ($entry = get_cached($entryuri)) {
            list($date, $time) = split(' ', $entry['date']);
            if (!is_array($days[$date])) {
                $days[$date] = array();
            }
            $days[$date][$time] = $entry;
        } else {
            $page = get($entryuri);
            if ($page) {
                if (preg_match('#<!-- body area -->(.*)<!-- /body area -->#ms', $page, $matches)) {
                    $page = $matches[1];
                    $linkre = '<a[^>]>(\\d+)</a>';
                    $tagsre = '(?:\\s|<[^>]+?>)+';
                    if (preg_match("!(?:said|wrote),{$tagsre}@{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}-{$tagsre}(\\d+){$tagsre}(\\d+:\\d+:\\d+).*?</CENTER>.*?<ul>(.*)</ul>!ms", $page, $matches)) {
                        $date = $matches[1] . "-" . $matches[2] . "-" . $matches[3];
                        $time = $matches[4];
                        $data = trim($matches[5]);
                        if (preg_match('!^<table.*?>(.*?)</table>(.*)$!ms', $data, $matches)) {
                            $data = trim($matches[2]) . "<table>" . $matches[1] . "</table>";
                        }
                        $data = preg_replace('!^<p>!ms', '', $data);
                        if (preg_match('!^(.*?)<br />(.*)$!ms', $data, $matches)) {
                            $data = trim($matches[2]);
                            $subject = trim($matches[1]);
                        }
                        // Fix up relative URIs:
                        //$data = preg_replace("%(href|src)=('|\")(!?[a-z]://)(.*?)(\\2)%xi", '\1=\2http://www.livejournal.com/\3\2', $data);
                        $entry = array('date' => "{$date} {$time}", 'subject' => $subject, 'data' => $data, 'uri' => $entryuri, 'friend_uri' => $uri);
                        //print("<!-- Got entry $entryuri: \n");
                        //print_r($entry);
                        //print("-->\n");
                        if (!is_array($days[$date])) {
                            $days[$date] = array();
                        }
                        $days[$date][$time] = $entry;
                        if (strlen(trim($data)) > 7) {
                            put_cache(array($entry));
                        } else {
                            mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                        }
                    } else {
                        print "Couldn't extract {$entryuri}\n";
                        mail_entry("*****@*****.**", "LiveJournal Entry Breaks Parser... more at 10.", $entryuri, "{$page}");
                        continue;
                    }
                } else {
                    continue;
                }
            }
        }
    }
    return collapse_times($days);
}