function convertData($html) { // Style according to the Netiquette $html = preg_replace('#<(?:b|strong)\\b[^>]*>(\\s*)#iu', '$1*', $html); $html = preg_replace('#(\\s*)</(?:b|strong)\\b[^>]*>#iu', '*$1', $html); $html = preg_replace('#<u\\b[^>]*>(\\s*)#iu', '$1_', $html); $html = preg_replace('#(\\s*)</u\\b[^>]*>#iu', '_$1', $html); // Remove <sub> and <sup> tags $html = preg_replace('#<(/?)su[bp]\\b([^>]*)>#iu', '<$1span$2>', $html); // Fill empty alt attributes with whitespace, clear src attributes $html = preg_replace('#(<[^>]+\\balt=")"#iu', '$1 "', $html); $html = preg_replace('#(<[^>]+\\bsrc=")(?:[^"]*)"#iu', '$1"', $html); // Inline URLs $html = preg_replace_callback('#<a\\b[^>]*\\shref="([^"]*)"[^>]*>(.*?)</a\\b[^>]*>#isu', array(__CLASS__, 'buildTextAnchor'), $html); // Convert html-entities to UTF-8 for w3m $html = str_replace(array('"', '<', '>', ''', '"', '<', '>', "'"), array('&quot;', '&lt;', '&gt;', '&#039;', '"', '<', '>', '''), FILTER::get($html, 'text')); $html = html_entity_decode($html, ENT_COMPAT, 'UTF-8'); $file = tempnam(PATCHWORK_ZCACHE, 'converter'); Patchwork::writeFile($file, $html); $html = escapeshellarg($file); $html = `w3m -dump -cols {$this->cols} -T text/html -I UTF-8 -O UTF-8 {$html}`; $html = str_replace(self::$charMap[0], self::$charMap[1], $html); $html = strtr($html, self::$textAnchor); self::$textAnchor = array(); unlink($file); return $html; }
protected function init(&$param) { if (isset($this->form->rawValues[$this->name])) { $value =& $this->form->rawValues[$this->name]; $value = FILTER::get($value, 'html'); } parent::init($param); if (isset($param['toolbarSet'])) { $this->toolbarSet = $param['toolbarSet']; } if (isset($param['config'])) { $this->config = $param['config']; } }
public final function __construct($args = array()) { $class = get_class($this); isset($this->contentType) or $this->contentType = constant($class . '::contentType'); $a = (array) $this->get; $this->get = (object) array(); $_GET = array(); foreach ($a as $key => &$a) { if (is_string($key)) { $default = $a; $a = $key; } else { $default = ''; } false !== strpos($a, "") && ($a = str_replace("", '', $a)); if (false !== strpos($a, '\\')) { $a = strtr($a, array('\\\\' => '\\', '\\:' => "")); $a = explode(':', $a); $b = count($a); do { false !== strpos($a[--$b], "") && ($a[$b] = strtr($a[$b], "", ':')); } while ($b); } else { $a = explode(':', $a); } $key = array_shift($a); $b = isset($args[$key]) ? (string) $args[$key] : $default; false !== strpos($b, "") && ($b = str_replace("", '', $b)); if ($a) { $b = FILTER::get($b, array_shift($a), $a); if (false === $b) { $b = $default; } } $_GET[$key] = $this->get->{$key} = $b; } $this->control(); if (!$this->contentType && '' !== ($a = strtolower(pathinfo(p\Superloader::class2file($class), PATHINFO_EXTENSION)))) { $this->contentType = isset(p\StaticResource::$contentType['.' . $a]) ? p\StaticResource::$contentType['.' . $a] : 'application/octet-stream'; } $this->contentType && header('Content-Type: ' . $this->contentType); }
protected function init(&$param) { empty($param['disabled']) || ($this->disabled = true); if ($this->disabled || !empty($param['readonly'])) { $this->readonly = true; } if (isset($param['valid'])) { $this->valid = $param['valid']; } else { if (!isset($this->valid)) { $this->valid = 'char'; if (!isset($param[0]) && '' !== $this->validDefaultRx) { $this->validArgs[] = $this->validDefaultRx; $this->validmsg = T($this->validDefaultMsg); } } } if (!empty($param['multiple'])) { $this->isdata = false; $this->multiple = true; } isset($param['isdata']) && ($this->isdata = (bool) $param['isdata']); $i = 0; while (isset($param[$i])) { $this->validArgs[] =& $param[$i++]; } isset($param['validmsg']) && ($this->validmsg = $param['validmsg']); isset($param['validMsg']) && ($this->validmsg = $param['validMsg']); $this->validmsg || ($this->validmsg = FILTER::getMsg($this->valid, $this->validArgs)); if (!$this->readonly && isset($this->form->rawValues[$this->name])) { $value = $this->form->rawValues[$this->name]; if (is_string($value) && false !== strpos($value, "")) { $value = str_replace("", '', $value); $this->form->rawValues[$this->name] = $value; } } else { if (isset($param['default'])) { $value = $param['default']; if ($this->multiple && !is_array($value)) { $value = explode(',', $value); $value = array_map('rawurldecode', $value); } $this->setValue($value); $value =& $this->value; } else { $value = ''; } } if ($this->multiple) { $this->status = ''; if ($value) { if (is_array($value)) { $status = true; foreach ($value as $i => &$v) { if ('' === $v) { unset($value[$i]); } else { $a = FILTER::get($v, $this->valid, $this->validArgs); if (false === $a) { $status = false; } else { $v = $a; $status = true && $status; } } } $value && ($this->status = $status); } else { $this->status = false; $value = array(); } } else { $value = array(); } } else { if ('' === (string) $value) { $this->status = ''; } else { $this->status = FILTER::get($value, $this->valid, $this->validArgs); if ('' !== $this->status && false !== $this->status) { $value = $this->status; $this->status = true; } } } $this->setValue($value); }
function convertFile($file) { $file = escapeshellarg($file); $file = `antiword -t -w 0 -m UTF-8 {$file}`; return FILTER::get($file, 'text'); }
function convertFile($file) { $file = escapeshellarg($file); $file = `pdftotext -enc UTF-8 {$file} -`; return FILTER::get($file, 'text'); }