/** * Factory method * Makes the object is this class * and load the html string, first wraps the html * string into the <div class="newsItem"> * * @param object of type Utf8String $sHtml html string to load * usually this is the feed item from rss feed. * by being an object of type Utf8String it's guaranteed * to be an already in utf-8 charset * * @return object of this class * * @throws LampcmsDevException is unable to load the string */ public static function loadFeedItem(Utf8String $oHtml, $sBaseUri = '', $bAddNoFollow = true, $parseCodeTags = true) { $oDom = new self('1.0', 'utf-8'); $oDom->encoding = 'UTF-8'; $oDom->preserveWhiteSpace = true; $oDom->recover = true; $oDom->setNofollow($bAddNoFollow); $sHtml = $oHtml->valueOf(); /** * @todo * maybe we should add class to this div and * then in the getFeedItem() don't remove the div at all, * so it will always be part of feed item's html, * it's just going to wrap the entire item. * So when we add item to a page we know it will always be wrapped * in this additional div * */ /** * Extremely important to add the * <META CONTENT="text/html; charset=utf-8"> * This is the ONLY way to tell the DOM (more spefically * the libxml) that input is in utf-8 encoding * Whithout this the DOM will assume that input is in the * default ISO-8859-1 format and then * will try to recode it to utf8 * essentially it will do its own conversion to utf8, * messing up the string because it's already in utf8 and does not * need converting * * IMPORTANT: we are also wrapping the whole string in <div> * so that it will be easy to get back just the contents of * the first div * */ $sHtml = '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd"> <head> <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"> </head> <body><div>' . $sHtml . '</div></body></html>'; $ER = error_reporting(0); if (false === @$oDom->loadHTML($sHtml)) { throw new DevException('Error. Unable to load html string: ' . $sHtml); } error_reporting($ER); /** * If $sBaseUrl begins with http * then set the $this->baseUri value to this value * and make sure it always ends with forward slash */ if (!empty($sBaseUri) && 'http' === substr($sBaseUri, 0, 4) && strlen($sBaseUri) > 12) { $oDom->baseUri = rtrim($sBaseUri, '/') . '/'; } $oDom->setRelNofollow(); if ($parseCodeTags) { $oDom->parseCodeTags(); } $oDom->fixImgBaseUri(); //->getImages(); return $oDom; }