/** * Used to extract the title, description and links from * a string consisting of ebook publication data. * * @param string $page epub contents * @param string $url the url where the page contents came from, * used to canonicalize relative links * * @return array a summary of the contents of the page * */ function process($page, $url) { $summary = NULL; $opf_pattern = "/.opf\$/i"; $html_pattern = "/.html\$/i"; $xhtml_pattern = "/.xhtml\$/i"; $epub_url[0] = ''; $epub_language = ''; $epub_title = ''; $epub_unique_identifier = ''; $epub_author = ''; $epub_publisher = ''; $epub_date = ''; $epub_subject = ''; $desc = ''; $htmlcontent = ''; // Open a zip archive $zip = new PartialZipArchive($page); $num_files = $zip->numFiles(); for ($i = 0; $i < $num_files; $i++) { // get the content file names of .epub document $filename[$i] = $zip->getNameIndex($i); if (preg_match($opf_pattern, $filename[$i])) { // Get the file data from zipped folder $opf_data = $zip->getFromName($filename[$i]); $opf_summary = $this->xmlToObject($opf_data); for ($m = 0; $m <= MAX_DOM_LEVEL; $m++) { for ($n = 0; $n <= MAX_DOM_LEVEL; $n++) { if (isset($opf_summary->children[$m]->children[$n])) { $child = $opf_summary->children[$m]->children[$n]; if (isset($child->name) && $child->name == "dc:language") { $epub_language = $opf_summary->children[$m]->children[$n]->content; } if ($opf_summary->children[$m]->children[$n]->name == "dc:title") { $epub_title = $opf_summary->children[$m]->children[$n]->content; } if ($opf_summary->children[$m]->children[$n]->name == "dc:creator") { $epub_author = $opf_summary->children[$m]->children[$n]->content; } if ($opf_summary->children[$m]->children[$n]->name == "dc:identifier") { $epub_unique_identifier = $opf_summary->children[$m]->children[$n]->content; } } } } } else { if (preg_match($html_pattern, $filename[$i]) || preg_match($xhtml_pattern, $filename[$i])) { $html = new HtmlProcessor(); $html_data = $zip->getFromName($filename[$i]); $description[$i] = $html->process($html_data, $url); $htmlcontent .= $description[$i]['t']; } } } if ($epub_title != '') { $desc = " {$epub_title} ."; } if ($epub_author != '') { $desc = $desc . " {$epub_author} "; } if ($epub_language != '') { $desc = $desc . " {$epub_language} "; } if ($epub_unique_identifier != '') { $desc = $desc . " URN-" . $epub_unique_identifier . "."; } if ($epub_publisher != '') { $desc = $desc . " {$epub_publisher} "; } if ($epub_date != '') { $desc = $desc . " {$epub_date} "; } if ($epub_subject != '') { $desc = $desc . " {$epub_subject} "; } $desc = $desc . $htmlcontent; //restrict the length of the description to maximum description length if (strlen($desc) > self::$max_description_len) { $desc = substr($desc, 0, self::$max_description_len); } $summary[self::TITLE] = $epub_title; $summary[self::DESCRIPTION] = $desc; $summary[self::LANG] = $epub_language; $summary[self::LINKS] = $epub_url; $summary[self::PAGE] = $page; return $summary; }