/** * @inheritdoc */ public function transform($value) { if (is_null($value)) { return $value; } if (is_scalar($value)) { $value = (string) $value; } if (!is_string($value)) { throw new TransformationFailedException(sprintf('Expected a string to transform, got %s instead', json_encode($value))); } // replace non-breaking spaces, somehow this results in a question mark when markdownifying $value = str_replace([' ', " "], ' ', $value); // remove leading spaces/tabs $value = preg_replace('/^[ \\t]+/m', '', $value); // purify the html first $value = $this->purifier->purify($value); // perform some replacements... $replacements = [[['/>\\s+</', '/\\s+<\\//'], ['><', '</']], [['/\\s+<br\\/?>/', '/<br\\/?>\\s+/'], '<br>'], ['/([^>])\\n([^<])/', '\\1<br>\\2'], ['/(<(p|li)>)<br\\s?\\/?>/i', '\\1'], ['/<br\\s?\\/?>(<\\/(p|li)>)/i', '\\1']]; foreach ($replacements as list($search, $replace)) { $value = preg_replace($search, $replace, $value); } // strip tags in headings foreach (range(1, 6) as $headingSize) { $value = preg_replace_callback('/(<h' . $headingSize . '>)(.*)(<\\/h' . $headingSize . '>)/iU', function ($matches) { if (count($matches) !== 4) { return $matches[0]; } return $matches[1] . trim(strip_tags(str_replace('<br>', ' ', $matches[2]))) . $matches[3]; }, $value); } // remove any double bullets $value = preg_replace('/(<li>\\s*)[\\*|\\-]{1}/im', '\\1', $value); // convert to markdown $value = @$this->converter->parseString($value); // Fix different types of bullets. What this does is check each line if it starts with any of "-ו○", // not followed by another bullet, and normalizes it to "* text". $value = preg_replace('/^[\\-ו○]\\s*([^\\-ו○])/mu', '* $1', $value); // Now make sure there's a newline before 2 consecutive lines that start with a bullet. // This could lead to superfluous newlines, but they will be corrected later on. $value = preg_replace('/(\\n\\* [^\\n]+){2,}/', "\n\$0", "\n" . $value); // remove trailing spaces/tabs $value = preg_replace('/[ \\t]+$/m', '', $value); // remove excessive newlines $value = preg_replace('/\\n{3,}/m', "\n\n", $value); return trim($value); }
/** * @inheritdoc */ public function transform($value) { if (is_null($value)) { return $value; } if (is_scalar($value)) { $value = (string) $value; } if (!is_string($value)) { throw new TransformationFailedException(sprintf('Expected a string to transform, got %s instead', json_encode($value))); } // replace non-breaking spaces, somehow this results in a question mark when markdownifying $value = str_replace([' ', " "], ' ', $value); // remove leading spaces/tabs $value = preg_replace('/^[ \\t]+/m', '', $value); // purify to remove really obscure html $value = $this->purifier->purify($value); // remove whitespace/newlines between tags: this can cause trailing // whitespace after markdownifying $value = preg_replace(['/>\\s+</', '/\\s+<\\//'], ['><', '</'], $value); // also remove whitespace/newlines around <br> tags $value = preg_replace(['/\\s+<br\\/?>/', '/<br\\/?>\\s+/'], '<br>', $value); // Replace newlines with <br> if the newline is not between 2 tags $value = preg_replace('/([^>])\\n([^<])/', '\\1<br>\\2', $value); // Remove <br>'s at the beginning of a paragraph $value = preg_replace('/(<(p|li)>)<br\\s?\\/?>/i', '\\1', $value); // Remove <br>'s at the end of a paragraph $value = preg_replace('/<br\\s?\\/?>(<\\/(p|li)>)/i', '\\1', $value); // replace •-bullets $value = preg_replace('/•/', '*', $value); // convert to markdown $value = @$this->converter->parseString($value); // remove trailing spaces/tabs $value = preg_replace('/[ \\t]+$/m', '', $value); // remove excessive newlines $value = preg_replace('/\\n{3,}/m', "\n\n", $value); return $value; }
public function testResetState() { // Broken (unclosed) tags cause properties (such as indents) to run onto subsequent strings, $blockquote = 'Test blockquote <blockquote>Here it is'; $linebreaks = 'Test<br /><br />Linebreaks'; $converter = new Converter(); $bqOutput = $converter->parseString($blockquote); $this->assertContains('>', $bqOutput); $lbOutput = $converter->parseString($linebreaks); $this->assertNotContains('>', $lbOutput); }