Ejemplo n.º 1
0
function getHPFanficArchiveInfo($url)
{
    $urlParts = parse_url($url);
    parse_str($urlParts['query'], $query);
    if (isset($query['sid'])) {
        $storyId = $query['sid'];
        if (is_numeric($storyId)) {
            $url = "{$urlParts['scheme']}://{$urlParts['host']}/stories/viewstory.php?sid={$storyId}";
            $response = cURL($url);
            $html = new HTML5();
            $html = $html->loadHTML($response);
            $story = new Story();
            $story->id = $storyId;
            $story->url = $url;
            $title = qp($html, '#pagetitle')->find('a[href^="viewstory"]')->first()->text();
            if (empty($title)) {
                throw new FicSaveException("Could not retrieve title for story at {$url}.");
            } else {
                $story->title = $title;
            }
            $author = qp($html, '#pagetitle')->find('a[href^="viewuser"]')->first()->text();
            if (empty($author)) {
                throw new FicSaveException("Could not retrieve author for story at {$url}.");
            } else {
                $story->author = $author;
            }
            $description = qp($html, '#mainpage')->find('.block')->get(1);
            if ($description == NULL) {
                throw new FicSaveException("Could not retrieve description for story at {$url}.");
            } else {
                $story->description = stripAttributes(preg_replace('/<a(.*?)>(.*?)<\\/a>/', '\\2', trim(qp($description)->find('.content')->first()->innerHTML())));
            }
            $chaptersBlock = qp($html, '#mainpage')->find('.block')->get(3);
            if ($chaptersBlock == NULL) {
                throw new FicSaveException("Could not get number of chapters for story at {$url}.");
            } else {
                $chapterLinks = qp($chaptersBlock)->find('a[href^="viewstory"]');
                $numChapters = $chapterLinks->count();
                if ($numChapters > 0) {
                    $story->chapters = $numChapters;
                    $story->metadata = array();
                    foreach ($chapterLinks as $chapterLink) {
                        $story->metadata[] = $chapterLink->text();
                    }
                } else {
                    throw new FicSaveException("Could not get number of chapters for story at {$url}.");
                }
            }
            return $story;
        } else {
            throw new FicSaveException("URL has an invalid story ID: {$storyId}.");
        }
    } else {
        throw new FicSaveException("URL is missing story ID.");
    }
}
Ejemplo n.º 2
0
function getFanfictionNetInfo($url)
{
    $urlParts = parse_url($url);
    $pathParts = explode('/', $urlParts['path']);
    if (isset($pathParts[2])) {
        $storyId = $pathParts[2];
        if (is_numeric($storyId)) {
            $response = cURL($url);
            $html = new HTML5();
            $html = $html->loadHTML($response);
            $story = new Story();
            $story->id = $storyId;
            $urlParts = parse_url($url);
            $story->url = "{$urlParts['scheme']}://{$urlParts['host']}/s/{$storyId}";
            $title = qp($html, '#profile_top')->find('b')->first()->text();
            if (empty($title)) {
                throw new FicSaveException("Could not retrieve title for story at {$url}.");
            } else {
                $story->title = $title;
            }
            $author = qp($html, '#profile_top')->find('a')->first()->text();
            if (empty($author)) {
                throw new FicSaveException("Could not retrieve author for story at {$url}.");
            } else {
                $story->author = $author;
            }
            $description = qp($html, '#profile_top')->find('div')->get(2);
            if ($description == NULL) {
                throw new FicSaveException("Could not retrieve description for story at {$url}.");
            } else {
                $story->description = stripAttributes(preg_replace('/<a(.*?)>(.*?)<\\/a>/', '\\2', trim(qp($description)->html() . qp($description)->next()->html())));
            }
            $numChapters = qp($html, '#chap_select')->find('option')->count() / 2;
            // value is always doubled for some reason
            $story->chapters = $numChapters == 0 ? 1 : $numChapters;
            $coverImageUrl = qp($html, '#profile_top')->find('img')->first()->attr('src');
            if ($coverImageUrl != NULL) {
                $coverImageUrlParts = parse_url($coverImageUrl);
                if (!isset($coverImageUrlParts['scheme']) && substr($coverImageUrl, 0, 2) == '//') {
                    $coverImageUrl = $urlParts['scheme'] . ":" . $coverImageUrl;
                }
                $coverImageUrl = str_replace('/75/', '/180/', $coverImageUrl);
                $story->coverImageUrl = $coverImageUrl;
            }
            return $story;
        } else {
            throw new FicSaveException("URL has an invalid story ID: {$storyId}.");
        }
    } else {
        throw new FicSaveException("URL is missing story ID.");
    }
}
Ejemplo n.º 3
0
function getAdultFanfictionOrgChapter($url, $chapterNumber)
{
    $response = cURL($url . "&chapter=" . $chapterNumber);
    $html = new HTML5();
    $html = $html->loadHTML($response);
    $chapter = new Chapter();
    $chapter->number = $chapterNumber;
    $chapterTitle = qp($html, 'select[name=chapnav]')->find('option[selected]')->text();
    $chapterTitle = trim(str_replace($chapterNumber . '.', '', $chapterTitle));
    $chapter->title = $chapterTitle;
    $content = stripAttributes(trim(qp(qp($html, 'form[name=form]')->find('tr')->get(3))->find('td')->innerHTML()));
    $chapter->content = $content;
    return $chapter;
}
Ejemplo n.º 4
0
function getAsianFanficsInfo($url)
{
    $urlParts = parse_url($url);
    $pathParts = explode('/', $urlParts['path']);
    if (isset($pathParts[3])) {
        $storyId = $pathParts[3];
        if (is_numeric($storyId)) {
            $url = "{$urlParts['scheme']}://{$urlParts['host']}/story/view/{$storyId}";
            $response = cURL($url);
            $html = new HTML5();
            $html = $html->loadHTML($response);
            $story = new Story();
            $story->id = $storyId;
            $story->url = $url;
            $title = trim(qp($html, 'h1.title')->first()->text());
            if (empty($title)) {
                throw new FicSaveException("Could not retrieve title for story at {$url}.");
            } else {
                $story->title = $title;
            }
            $author = qp(qp($html, 'span.text--info')->get(0))->next()->text();
            if (empty($author)) {
                throw new FicSaveException("Could not retrieve author for story at {$url}.");
            } else {
                $story->author = $author;
            }
            $description = qp($html, '#bodyText')->find('h2')->first()->next();
            if ($description == NULL) {
                throw new FicSaveException("Could not retrieve description for story at {$url}.");
            } else {
                $story->description = stripAttributes(trim($description->innerHTML()));
            }
            $story->chapters = qp($html, 'select[name="chapterNav"]')->find('option')->count() - 1;
            return $story;
        } else {
            throw new FicSaveException("URL has an invalid story ID: {$storyId}.");
        }
    } else {
        throw new FicSaveException("URL is missing story ID.");
    }
}
Ejemplo n.º 5
0
/**
 * Callback used by a preg_replace_callback in nextPage to make a table
 * @param array $matches of table cells
 */
function makeTableCallback($matches)
{
    $table = str_replace("\n!", "\n|#", $matches[2]);
    $table = str_replace("!!", "||#", $table);
    $table = str_replace("||", "\n|", $table);
    $row_data = explode("|", $table);
    $first = true;
    $out = $matches[1];
    $state = "";
    $skip = false;
    $type = "td";
    $old_type = "td";
    $table_cell_attributes = array("align", "colspan", "style", "scope", "rowspace", "valign");
    foreach ($row_data as $item) {
        crawlTimeoutLog("..Making Wiki Tables..");
        if ($first) {
            $item = trim(str_replace("\n", " ", $item));
            $item = str_replace("&quot;", "\"", $item);
            $item = stripAttributes($item, array('id', 'class', 'style'));
            $out .= "<table {$item} >\n<tr>";
            $first = false;
            $old_line = true;
            continue;
        }
        $end = substr($out, -4);
        if ($item == "" || $item[0] == "-") {
            if ($end != "<tr>") {
                $out .= "</{$old_type}>";
            }
            $out .= "</tr>\n<tr>";
            continue;
        }
        if ($item[0] == "+") {
            $type = "caption";
            $item = substr($item, 1);
            if ($end == "<tr>") {
                $out = substr($out, 0, -4);
            }
        } else {
            if ($item[0] == "#") {
                $type = "th";
                $item = substr($item, 1);
            } else {
                $type = "td";
            }
        }
        $trim_item = trim($item);
        $attribute_trim = str_replace("\n", " ", $trim_item);
        $attribute_trim = str_replace("&quot;", "\"", $attribute_trim);
        if (!$skip && ($state = trim(stripAttributes($attribute_trim, $table_cell_attributes)))) {
            $old_type = $type;
            $skip = true;
            continue;
        }
        $skip = false;
        if ($end != "<tr>") {
            $out .= "</{$old_type}>";
            if ($old_type == "caption") {
                $out .= "<tr>";
            }
        }
        $out .= "<{$type} {$state}>\n{$trim_item}";
        $state = "";
        $old_type = $type;
    }
    $out .= "</{$old_type}></tr></table>";
    return $out;
}
Ejemplo n.º 6
0
/**
 *
 * @param unknown_type $content
 * @return unknown_type
 */
function formatText($content)
{
    $content = html_entity_decode($content, ENT_COMPAT, 'UTF-8');
    // end get proper links in text version
    $content = str_replace("\r", "\n", $content);
    $serachArray = array('&nbsp;', "</p><p>", "<p>", '</p>', "\n\n\n\n<ol>", "</ol>\n\n\n\n", "\n\n\n\n<ul>", "</ul>\n\n\n\n", "\n\n<ol>", "</ol>\n\n", "\n\n<ul>", "</ul>\n\n", "<li>\n", '<li>', '</li>', "<hr>", "<br />\n", '<br />', '<br>', '<h1>', '</h1>', '<h2>', '</h2>', '<h3>', '</h3>', '<h4>', '</h4>', '<h5>', '</h5>');
    $replaceArray = array(' ', "[[[BR]]]\n", "\n", "\n", "[[[BR]]]", "[[[BR]]]", "[[[BR]]]", "[[[BR]]]", "[[[BR]]]", "[[[BR]]]", "[[[BR]]]", "[[[BR]]]", "<li>", '- ', "\n", "[[[HR]]]", "[[[BR]]]", "[[[BR]]]", '[[[BR]]]', "\n= ", " =\n", "\n== ", " ==\n", "\n=== ", " ===\n", "\n==== ", " ====\n", "\n===== ", " =====\n");
    $content = str_replace($serachArray, $replaceArray, $content);
    // get proper links in text version
    $content = formatTextLink($content);
    $content = stripAttributes($content);
    // preg_replace("/\n[^\w]*\n/","\n", $content);
    // turn returns to newlines:
    $content = str_replace("\r", "\n", $content);
    // turn tabs to spaces:
    $content = str_replace("\t", " ", $content);
    // next is searching for double spaces.
    /*  while (preg_match("/ /i", "$content"))
        {
        // replace them with single spaces:
        $content = str_replace(" ", " ", $content);
        }*/
    // looks for spaces after a newline:
    while (preg_match("/\n /", "{$content}")) {
        // remove that space:
        $content = str_replace("\n ", "\n", $content);
    }
    // look for two newlines:
    while (preg_match("/\n\n/i", "{$content}")) {
        // turn it to one newline
        $content = str_replace("\n\n", "\n", $content);
    }
    // the \n now separates paragraphs; change \n to <p>:
    /*$content = "<p>" . str_replace("\n", "</p><p>", $content) . "</p>";
      $content = str_replace("<p></p>", "", $content);*/
    // done!
    $serachArray = array("\n= ", "\n== ", "\n=== ", "\n==== ", "\n===== ", "\n[[[HR]]]", "[[[BR]]]");
    $replaceArray = array("\n\n= ", "\n\n== ", "\n\n=== ", "\n\n==== ", "\n\n===== ", "\n--------------------------------------------------------------------------------", "\n");
    $content = str_replace($serachArray, $replaceArray, $content);
    return $content;
}