/** * Converts a given string to our xml friendly text. * This step involves purifying the string * * @param String $string Input string to reformat * @return String Reformatted string (optional HTML -> Markdown, UTF-8) */ public function xml_ready($string, $convert_to_markdown = true) { static $purifier = null; static $fixer = null; static $markdown = null; if ($purifier === null) { $purifier_config = HTMLPurifier_Config::createDefault(); $purifier_config->set('Cache.SerializerPath', realpath($GLOBALS['TMP_PATH'])); $purifier = new HTMLPurifier($purifier_config); $markdown = new HTML_To_Markdown(); $markdown->set_option('strip_tags', true); } $string = studip_utf8encode($string); $string = $purifier->purify($string); if ($convert_to_markdown) { $string = $markdown->convert($string); $string = preg_replace('/\\[\\]\\((\\w+:\\/\\/.*?)\\)/', '', $string); $string = preg_replace('/\\[(\\w+:\\/\\/.*?)\\/?\\]\\(\\1\\/?\\s+"(.*?)"\\)/isxm', '$2: $1', $string); $string = preg_replace('/\\[(\\w+:\\/\\/.*?)\\/?\\]\\(\\1\\/?\\)/isxm', '$1', $string); $string = preg_replace('/\\[(.*?)\\]\\((\\w+:\\/\\/.*?)\\)/', '$1: $2', $string); } $string = preg_replace('/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f]/', '', $string); $string = trim($string); $string = htmlspecialchars($string, ENT_QUOTES, 'UTF-8'); return $string; }
/** * In addition to markdown conversion, strips images and treats h3-6 as paragraphs. * @param string $html * @return string empty if conversion fails */ public static function convert($html) { $converter = new HTML_To_Markdown(); $html = preg_replace('/<img[^>]*>/', '', $html); $html = preg_replace('/<(\\/)?h[3-6]>/', '<$1p>', $html); $converter->set_option('header_style', 'postmatic'); $converter->set_option('strip_tags', true); $markdown = $converter->convert($html); return $markdown ? $markdown : ''; }
public static function fromHtml($html, $detail = false) { # Undo markdown require_once dirname(__FILE__) . '/html-to-markdown/HTML_To_Markdown.php'; $markdown = new HTML_To_Markdown($html); if (!$detail) { return self::deparseBlock($markdown->output()); } $detail = array('markdown' => $markdown->output(), 'human' => self::deparseBlock($markdown->output())); return $detail; }
/** * Create the Calendar Event * * @param $title * @param $startDate * @param null $description * @param int $duration * @param null $location */ public function __construct($title, $startDate, $description = null, $duration = 60, $location = null) { if ($description) { $markdown = new HTML_To_Markdown(); $markdown->set_option('strip_tags', true); $noImageText = preg_replace(array("/<img[^>]+\\>/i", "/<hr[^>]+\\>/i"), "", $description); $markdown->convert($noImageText); $this->description = $markdown; } if (is_numeric($startDate)) { $startTimestamp = new DateTime(); $startTimestamp->setTimestamp($startDate); } else { $startTimestamp = new DateTime($startDate); } $startTimestamp->setTimezone(new DateTimeZone('UTC')); $this->startDate = $startTimestamp->format('Ymd\\THis\\Z'); $this->duration = $duration; $this->endDate = $startTimestamp->modify('+' . $duration . ' minutes')->format('Ymd\\THis\\Z'); $this->location = $location; $this->title = strip_tags($title); }
public function test_set_option() { $markdown = new HTML_To_Markdown(); $markdown->set_option('strip_tags', true); $markdown->convert('<span>Strip</span>'); $this->assertEquals('Strip', $markdown->__toString()); }
$sql = 'SELECT post_title, post_name, post_date_gmt, post_status, post_content FROM `' . $config['prefix'] . '_posts` WHERE post_status = "publish" AND post_content != ""'; $rs = $conn->query($sql); if ($rs === false) { trigger_error('Wrong SQL: ' . $sql . ' Error: ' . $conn->error, E_USER_ERROR); } else { $rows_returned = $rs->num_rows; // dumper($rs->fetch_assoc()); while ($row = $rs->fetch_assoc()) { // process the posts $date = date('Y-m-d', strtotime($row['post_date_gmt'])); $filename = $row['post_name'] . '.md'; if ($row['post_status'] == 'publish') { $filename = $date . '-' . $filename; } // convert content to md $temp = new HTML_To_Markdown($row['post_content']); $md = $temp->output(); // change url and assets directory foreach ($config['old_wp_url'] as $arr) { // change upload $md = str_replace($arr . '/wp-content/uploads', 'images', $md); $md = str_replace($arr, '', $md); } // insert title in content; $md = "#" . $row['post_title'] . "\n\n" . $md; // create post in posts directory file_put_contents('posts/' . $filename, $md); } } echo 'Done! Go <a href="gen.php">generate the index now »</a>'; //// =========================
protected function htmlToMarkdown($html) { $markdown = new \HTML_To_Markdown($html, $this->options); return $markdown->output(); }
function sanitize($v) { if (is_object($v)) { return $v; } $v = trim($v); if (!$v) { return ''; } if (preg_match($this->tag('p'), $v)) { $markdown = new HTML_To_Markdown($v, array('strip_tags' => true, 'remove_nodes' => 'img')); $v = $markdown->output(); $v = preg_replace('/\\[\\s*\\]/', '[link]', $v); } else { $v = strip_tags($v); $v = preg_replace("/\n/", " \n", $v); // Convert line breaks to markdown style } $v = trim($v, " \t\n\r\v Â"); return $v; }
public static function text($html = null, $overrides = null) { $md = new \HTML_To_Markdown($overrides); return $md->convert($html); }
/** * @param string $html * @return string */ protected function htmlToMarkdown($html) { $markdown = new \HTML_To_Markdown($html, array('header_style' => 'atx', 'bold_style' => '__', 'italic_style' => '_')); return $markdown->output(); }
/** * Convert HTML to markdown * * @since 1.0.0 * * @param string $html * @return string */ protected function convertToMarkdown($html) { $converter = new HTML_To_Markdown(); $converter->set_option('strip_tags', false); $converter->set_option('header_style', $this->headingStyle); return $converter->convert($html); }