function iterate_over_node($node) { if ($node instanceof DOMText) { return preg_replace("/\\s+/im", " ", $node->wholeText); } if ($node instanceof DOMDocumentType) { // ignore return ""; } $nextName = next_child_name($node); $prevName = prev_child_name($node); $name = strtolower($node->nodeName); // start whitespace switch ($name) { case "hr": return "------\n"; case "style": case "head": case "title": case "meta": case "script": // ignore these tags return ""; case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": // add two newlines $output = "\n"; break; case "p": case "div": // add one line $output = "\n"; break; default: // print out contents of unknown tags $output = ""; break; } // debug //$output .= "[$name,$nextName]"; for ($i = 0; $i < $node->childNodes->length; $i++) { $n = $node->childNodes->item($i); $text = iterate_over_node($n); $output .= $text; } // end whitespace switch ($name) { case "style": case "head": case "title": case "meta": case "script": // ignore these tags return ""; case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": $output .= "\n"; break; case "p": case "br": // add one line if ($nextName != "div") { $output .= "\n"; } break; case "div": // add one line only if the next child isn't a div if ($nextName != "div" && $nextName != null) { $output .= "\n"; } break; case "a": // links are returned in [text](link) format $href = $node->getAttribute("href"); if ($href == null) { // it doesn't link anywhere if ($node->getAttribute("name") != null) { $output = "{$output}"; } } else { if ($href == $output) { // link to the same address: just use link $output; } else { // replace it $output = "{$output} ({$href})"; } } // does the next node require additional whitespace? switch ($nextName) { case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": $output .= "\n"; break; } default: // do nothing } return $output; }
function iterate_over_node($node) { if ($node instanceof DOMText) { return preg_replace("/[\\t\\n\\f\\r ]+/im", " ", $node->wholeText); } if ($node instanceof DOMDocumentType) { // ignore return ""; } $nextName = next_child_name($node); $prevName = prev_child_name($node); $name = strtolower($node->nodeName); // start whitespace switch ($name) { case "hr": return "------\n"; case "style": case "head": case "title": case "meta": case "script": // ignore these tags return ""; case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": case "ol": case "ul": // add two newlines, second line is added below $output = "\n"; break; case "td": case "th": // add tab char to separate table fields $output = "\t"; break; case "tr": case "p": case "div": // add one line $output = "\n"; break; case "li": $output = "- "; break; case "b": case "strong": $output = "**"; break; case "i": case "em": $output = "_"; break; case "del": $output = "~~"; break; case "code": $output = "`"; break; default: // print out contents of unknown tags $output = ""; break; } // debug //$output .= "[$name,$nextName]"; if (isset($node->childNodes)) { for ($i = 0; $i < $node->childNodes->length; $i++) { $n = $node->childNodes->item($i); $text = iterate_over_node($n); $output .= $text; } } // end whitespace switch ($name) { case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": $output .= "\n"; break; case "p": case "br": // add one line if ($nextName != "div") { $output .= "\n"; } break; case "div": // add one line only if the next child isn't a div if ($nextName != "div" && $nextName != null) { $output .= "\n"; } break; case "a": // links are returned in [text](link) format $href = $node->getAttribute("href"); $output = trim($output); // remove double [[ ]] s from linking images if (substr($output, 0, 1) == "[" && substr($output, -1) == "]") { $output = substr($output, 1, strlen($output) - 2); // for linking images, the title of the <a> overrides the title of the <img> if ($node->getAttribute("title")) { $output = $node->getAttribute("title"); } } // if there is no link text, but a title attr if (!$output && $node->getAttribute("title")) { $output = $node->getAttribute("title"); } if ($href == null) { // it doesn't link anywhere if ($node->getAttribute("name") != null) { $output = "[{$output}]"; } } else { if ($href == $output || $href == "mailto:{$output}" || $href == "http://{$output}" || $href == "https://{$output}") { // link to the same address: just use link $output; } else { // replace it if ($output) { $output = "[{$output}]({$href})"; } else { // empty string $output = $href; } } } // does the next node require additional whitespace? switch ($nextName) { case "h1": case "h2": case "h3": case "h4": case "h5": case "h6": $output .= "\n"; break; } break; // mod by r-a-y // mod by r-a-y case "img": $alt = $node->getAttribute("alt"); $src = $node->getAttribute("src"); if (!empty($alt)) { $output = "![Image - {$alt}]"; } else { $output = "![Image]"; } $output .= "({$src})"; break; case "li": $output .= "\n"; break; case "b": case "strong": $output .= "**"; break; case "i": case "em": $output .= "_"; break; case "del": $output .= "~~"; break; case "code": $output .= "`"; break; default: // do nothing } return $output; }