/** * These are the SEOmoz just discovered data * @return multitype: */ public function getMozJustDiscovered() { $html = $this->moz->getData(MozServices::JD, $this->url); file_put_contents('just-discovered.txt', $html); $parser = new HtmlParser($html, $this->url); $tables = $parser->getTags('table'); $results = array(); if (count($tables) > 0) { $table = null; foreach ($tables as $tbl) { if (isset($tbl->attributes['id']) && $tbl->attributes['id'] === 'results') { $table = $tbl; break; } } //moz has data if (!empty($table)) { $p2 = new HtmlParser($table->raw, $this->url); $rows = $p2->getTags('tr'); foreach ($rows as $tr) { $p3 = new HtmlParser($tr->raw, $this->url); $tds = $p3->getTags('td'); if (!empty($tds[0]->text)) { array_push($results, array('link' => trim(strip_tags($tds[0]->text)), 'text' => trim(strip_tags($tds[1]->text)), 'pageAuthority' => trim(strip_tags($tds[2]->text)), 'DomainAuthority' => trim(strip_tags($tds[3]->text)), 'DiscoveryTime' => preg_replace('/\\s+/', ' ', trim(strip_tags($tds[4]->text))))); } } //no moz data } else { array_push($results, array('link' => 'No Data', 'text' => 'No Data', 'pageAuthority' => 'No Data', 'DomainAuthority' => 'No Data', 'DiscoveryTime' => 'No Data')); } } return $results; }
function testSimple() { $html2wiki = array("<b>bold</b>" => "*bold*", "<strong>strong</strong>" => "*strong*", "<i>italic</i>" => "_italic_", "<em>emphasized</em>" => "_emphasized_", "<HR>" => "----", "<DT><DD>Indent</DD></DT>" => ";:Indent", "<NOWIKI>nowiki</NOWIKI>" => "<verbatim>\nnowiki\n</verbatim>", "<DL><DT> Def </DT><DD> List</DD></DL>" => "; Def : List"); if (USE_GLOBAL_SAX) { $parser = new HtmlParser("PhpWiki2"); } // will not work! foreach ($html2wiki as $html => $wiki) { if (!USE_GLOBAL_SAX) { // redefine it for every run. $parser = new HtmlParser("PhpWiki2"); } if (USE_GLOBAL_SAX) { $parser->parse($html, false); } else { $parser->parse($html); } $this->assertEquals($wiki, trim($parser->output())); if (USE_GLOBAL_SAX) { unset($GLOBALS['xml_parser_root']); } else { $parser->__destruct(); } } }
public function parse() { $hp = new HtmlParser(); $ret = $hp->extractGitUrls($this->url); $this->arGitUrls = $hp->getGitUrls(); $this->error = $hp->error; return $ret; }
public function __construct($mxtDoc, $bolLoadFile = false, $strTemplate = '') { parent::__construct($mxtDoc, $bolLoadFile); $this->strTemplate = $strTemplate; $this->srcParser = new \Docbook\SrcParser(); $this->srcParser->setWrapper('/*!', '*/'); $this->srcParser->setNodeTemplate(array('cmd' => false, 'params' => array())); }
/** * Stores the linkback as remote fork in the paste repository. * * @param string $target Target URI that should be linked in $source * @param string $source Linkback source URI that should link to target * @param string $sourceBody Content of $source URI * @param object $res HTTP response from fetching $source * * @return void * * @throws SPb\Exception When storing the linkback fatally failed */ public function storeLinkback($target, $source, $sourceBody, \HTTP_Request2_Response $res) { //FIXME: deleted //FIXME: cleanuptask $hp = new HtmlParser(); $ok = $hp->extractGitUrls($source, $sourceBody); if ($ok === false) { //failed to extract git URL from linkback source //FIXME: send exception //$hp->error return; } $ci = $this->repo->getConnectionInfo(); $forks = $ci->getForks(); $arRemoteCloneUrls = $this->localizeGitUrls($hp->getGitUrls()); $remoteCloneUrl = $remoteTitle = null; if (count($arRemoteCloneUrls)) { reset($arRemoteCloneUrls); list($remoteCloneUrl, $remoteTitle) = each($arRemoteCloneUrls); } $remoteid = 'fork-' . uniqid(); //check if we already know this remote foreach ($forks as $remote) { if (isset($arRemoteCloneUrls[$remote->getCloneUrl()])) { $remoteTitle = $arRemoteCloneUrls[$remote->getCloneUrl()]; $remoteid = $remote->getName(); break; } else { if ($source == $remote->getWebURL(true)) { $remoteid = $remote->getName(); break; } } } $vc = $this->repo->getVc(); if (!$this->isLocalWebUrl($source)) { //only add remote homepage; we can calculate local ones ourselves $vc->getCommand('config')->addArgument('remote.' . $remoteid . '.homepage')->addArgument($source)->execute(); } if ($remoteTitle !== null) { $vc->getCommand('config')->addArgument('remote.' . $remoteid . '.title')->addArgument($remoteTitle)->execute(); } if ($remoteCloneUrl !== null) { $vc->getCommand('config')->addArgument('remote.' . $remoteid . '.url')->addArgument($remoteCloneUrl)->execute(); } }
function beforeInsert($db) { $event = $db->get(new Event($this->event)); if (Variable::istype('Event', $event) && $event->isActive($db)) { $this->hash = crypt($this->name . time() . mt_rand(0, 9999)); if ($this->mail) { $parser = new HtmlParser('thanks.mtpl'); $parser->setVariable('participant', $this); $mail = new Mail('*****@*****.**', 'rhaco kaigi'); $mail->to($this->mail, $this->name); $mail->subject('rhaco kaigi'); $mail->message($parser->read()); $mail->send(); } return true; } return false; }
function filter_format($string) { $parser = new HtmlParser($string); $type = 0; $found = 0; while ($found == 0 and $parser->parse()) { if ($parser->iNodeName == 'Text') { if ($type == 0) { if (stristr($parser->iNodeValue, 'Current Time')) { $type = 1; } } else { $value = $parser->iNodeValue; $found = 1; } } } return $value; }
private function merge(MapInterface $http, MapInterface $html) : MapInterface { if (!$http->contains(HttpParser::key())) { return $html; } if (!$html->contains(HtmlParser::key())) { return $http; } return $http->put(self::key(), $http->get(HttpParser::key())->merge($html->get(HtmlParser::key()))); }
<?php require dirname(__FILE__) . '/__init__.php'; Rhaco::import('tag.HtmlParser'); $db = new DbUtil(Event::connection()); $p = new HtmlParser('index.html'); $p->setVariable('event', $db->get(new Event(), new C(Q::depend(), Q::eq(Event::columnId(), Rhaco::constant('CURRENT_EVENT', 1))))); $p->setVariable('hatena', Rhaco::obj('HatenaSyntax', array('headlevel' => 4, 'id' => 'event_description'))); $p->write();
public function extractFromString($string) { $body_text = array(); if ($string != '') { $o_HtmlParser = new HtmlParser(); $a_DataParser = $o_HtmlParser->parser($string); if (!$a_DataParser) { return 'Cannot read your data !'; } /* do smart binary array */ $i = 0; $j = 0; $k = 0; $endcode = array(); $idx = 0; $token = array(); foreach ($a_DataParser['tokenized_data'] as $index => $sz_tokenizedData) { /* If is tag */ if ($this->isTag($a_DataParser, $index)) { $binary_token[$i] = 1; } elseif ($this->isTagClose($a_DataParser, $index)) { $binary_token[$i] = 0; } else { $binary_token[$i] = -1; $token[$idx] = $sz_tokenizedData; $idx++; } $i++; } for ($k = 0; $k < $i; $k++) { $x = $binary_token[$k]; /* Add an index for close tag. We will use it when calculate body text */ if ($x == 0) { $j++; $endcode[$j] = 0; $j++; continue; } if (abs($x + $endcode[$j]) < abs($endcode[$j])) { $j++; } $endcode[$j] += $x; } /* Extract body text */ $i_max = 0; $j_max = 0; $max = 0; for ($i = 0; $i < count($endcode) - 1; $i++) { if ($endcode[$i] >= 0) { continue; } for ($j = $i; $j < count($endcode); $j++) { if ($endcode[$j] >= 0) { continue; } /* Calculate max in range [i .. j] */ $S = $this->i_TagBefore($endcode, $i) + $this->i_fTagAfter($endcode, $j) + $this->i_fTextBetween($endcode, $i, $j); if ($S > $max) { $max = $S; $i_max = $i; $j_max = $j; } } } /* Calculate start and end point */ $start = 0; $end = 0; for ($i = 0; $i < $i_max; $i++) { if ($endcode[$i] == 0) { $start++; } else { $start += abs($endcode[$i]); } } for ($i = 0; $i < $j_max; $i++) { if ($endcode[$i] == 0) { $end++; } else { $end += abs($endcode[$i]); } } $return_text = array(); /* Calculate body text */ //for($i = $start - 1; $i <= $end -1; $i++) for ($i = 0; $i <= $end - 1; $i++) { $body_text[] = $token[$i]; } } $body_text = implode(' ', $body_text); return $body_text; }
/** * @desc Generates the HTML for a particular question. * @param integer $seed The seed of the question. * @param array $answers An array of answers that needs to be rendered. * @param object $event The event object. * @return string The HTML question representation. */ public function render($seed, &$answers, $event) { //JIT Derivation creation //Usually we have this from the check answers call if (!isset($this->_derivation)) { $client = WebworkClient::Get(); $env = WebworkQuestion::DefaultEnvironment(); $env->problemSeed = $seed; $result = $client->renderProblem($env, $this->_data->code); $derivation = new stdClass(); $derivation->html = base64_decode($result->output); $derivation->seed = $result->seed; $this->_derivation = $derivation; } $orderedanswers = array(); $tempanswers = array(); foreach ($answers as $answer) { $tempanswers[$answer->field] = $answer; } $answers = $tempanswers; $showpartialanswers = $this->_data->grading; $questionhtml = ""; $parser = new HtmlParser($this->_derivation->html); $currentselect = ""; $textarea = false; $checkboxes = array(); while ($parser->parse()) { //change some attributes of html tags for moodle compliance if ($parser->iNodeType == NODE_TYPE_ELEMENT) { $nodename = $parser->iNodeName; if (isset($parser->iNodeAttributes['name'])) { $name = $parser->iNodeAttributes['name']; } //handle generic change of node's attribute name if ($nodename == "INPUT" || $nodename == "SELECT" || $nodename == "TEXTAREA") { $parser->iNodeAttributes['name'] = 'resp' . $this->_data->question . '_' . $name; if ($event == QUESTION_EVENTGRADE && isset($answers[$name])) { if ($showpartialanswers) { if (isset($parser->iNodeAttributes['class'])) { $class = $parser->iNodeAttributes['class']; } else { $class = ""; } $parser->iNodeAttributes['class'] = $class . ' ' . question_get_feedback_class($answers[$name]->score); } } } //handle specific change if ($nodename == "INPUT") { $nodetype = strtoupper($parser->iNodeAttributes['type']); if ($nodetype == "CHECKBOX") { if (strstr($answers[$name]->answer, $parser->iNodeAttributes['value'])) { //FILLING IN ANSWER (CHECKBOX) array_push($orderedanswers, $answers[$name]); $parser->iNodeAttributes['checked'] = '1'; } $parser->iNodeAttributes['name'] = $parser->iNodeAttributes['name'] . '_' . $parser->iNodeAttributes['value']; } else { if ($nodetype == "TEXT") { if (isset($answers[$name])) { //FILLING IN ANSWER (FIELD) array_push($orderedanswers, $answers[$name]); $parser->iNodeAttributes['value'] = $answers[$name]->answer; } } } } else { if ($nodename == "SELECT") { $currentselect = $name; } else { if ($nodename == "OPTION") { if ($parser->iNodeAttributes['value'] == $answers[$currentselect]->answer) { //FILLING IN ANSWER (DROPDOWN) array_push($orderedanswers, $answers[$currentselect]); $parser->iNodeAttributes['selected'] = '1'; } } else { if ($nodename == "TEXTAREA") { if (isset($answers[$name])) { array_push($orderedanswers, $answers[$name]); $textarea = true; $questionhtml .= $parser->printTag(); $questionhtml .= $answers[$name]->answer; } } } } } } if (!$textarea) { $questionhtml .= $parser->printTag(); } else { $textarea = false; } } $answers = $orderedanswers; return $questionhtml; }
/** * @desc Prints the question. Calls question_webwork_derived, and prints out the html associated with derivedid. * @param $question object The question object to print. * @param $state object The state of the responses for the question. * @param $cmoptions object Options containing course ID. * @param $options object */ function print_question_formulation_and_controls(&$question, &$state, $cmoptions, $options) { global $CFG, $USER; $readonly = empty($options->readonly) ? '' : 'disabled="disabled"'; //Formulate question image and text $questiontext = $this->format_text($question->questiontext, $question->questiontextformat, $cmoptions); $image = get_question_image($question, $cmoptions->course); $derivationid = $state->responses['derivationid']; $derivation = get_record('question_webwork_derived', 'id', $derivationid); $unparsedhtml = base64_decode($derivation->html); //partial answers $showPartiallyCorrectAnswers = $question->grading; //new array keyed by field $fieldhash = $state->responses['answers']; $answerfields = array(); $parser = new HtmlParser($unparsedhtml); $currentselect = ""; while ($parser->parse()) { //change some attributes of html tags for moodle compliance if ($parser->iNodeType == NODE_TYPE_ELEMENT) { $nodename = $parser->iNodeName; $name = $parser->iNodeAttributes['name']; //handle generic change of node's attribute name if ($nodename == "INPUT" || $nodename == "SELECT" || $nodename == "TEXTAREA") { $parser->iNodeAttributes['name'] = 'resp' . $question->id . '_' . $name; if ($state->event == QUESTION_EVENTGRADE && isset($fieldhash[$name])) { if ($showPartiallyCorrectAnswers) { $parser->iNodeAttributes['class'] = $parser->iNodeAttributes['class'] . ' ' . question_get_feedback_class($fieldhash[$name]['score']); } } if (!strstr($name, 'previous')) { $answerfields[$name] = $fieldhash[$name]; } } //handle specific change if ($nodename == "INPUT") { //put submitted value into field if (isset($fieldhash[$name])) { $parser->iNodeAttributes['value'] = $fieldhash[$name]['answer']; } } else { if ($nodename == "SELECT") { $currentselect = $name; } else { if ($nodename == "OPTION") { if ($parser->iNodeAttributes['value'] == $fieldhash[$currentselect]['answer']) { $parser->iNodeAttributes['selected'] = '1'; } } else { if ($nodename == "TEXTAREA") { } } } } } $problemhtml .= $parser->printTag(); } //for the seed form field $qid = $question->id; $seed = $state->responses['seed']; //if the student has answered include "{$CFG->dirroot}/question/type/webwork/display.html"; }
public function __construct($mxtDoc, $bolLoadFile = false, $strTemplate) { parent::__construct($mxtDoc, $bolLoadFile); $this->strTemplate = $strTemplate; }
/** * wrapper around zaufi's HTML sucker code just to use the html to wiki bit * * \param &$c string -- HTML in * \param &$src string -- output string */ function parse_html(&$inHtml) { global $smarty; include ('lib/htmlparser/htmlparser.inc'); // Read compiled (serialized) grammar $grammarfile = 'lib/htmlparser/htmlgrammar.cmp'; if (!$fp = @fopen($grammarfile, 'r')) { $smarty->assign('msg', tra("Can't parse HTML data - no grammar file")); $smarty->display("error.tpl"); die; } $grammar = unserialize(fread($fp, filesize($grammarfile))); fclose($fp); // process a few ckeditor artifacts $inHtml = str_replace('<p></p>', '', $inHtml); // empty p tags are invisible // create parser object, insert html code and parse it $htmlparser = new HtmlParser($inHtml, $grammar, '', 0); $htmlparser->Parse(); // Should I try to convert HTML to wiki? $out_data = ''; /* * ['stack'] = array * Speacial keys introduced to convert to Wiki * - ['wikitags'] = the number of 'wikistack' entries produced by the html tag * * ['wikistack'] = array(), is used to save the wiki markup for the linebreak handling (1 array = 1 html tag) * Each array entry contains the following keys: * - ['begin'] = array() of begin markups (1 style definition = 1 array entry) * - ['end'] = array() of end markups * * wiki_lbr = true if we must use '%%%' for linebreaks instead of '\n' */ $p = array('stack' => array(), 'listack' => array(), 'wikistack' => array(), 'wiki_lbr' => 0, 'first_td' => false, 'first_tr' => false); $this->walk_and_parse($htmlparser->content, $out_data, $p, ''); // Is some tags still opened? (It can be if HTML not valid, but this is not reason // to produce invalid wiki :) while (count($p['stack'])) { $e = end($p['stack']); $out_data .= $e['string']; array_pop($p['stack']); } // Unclosed lists r ignored... wiki have no special start/end lists syntax.... // OK. Things remains to do: // 1) fix linked images $out_data = preg_replace(',\[(.*)\|\(img src=(.*)\)\],mU', '{img src=$2 link=$1}', $out_data); // 2) fix remains images (not in links) $out_data = preg_replace(',\(img src=(.*)\),mU', '{img src=$1}', $out_data); // 3) remove empty lines $out_data = preg_replace(",[\n]+,mU", "\n", $out_data); // 4) remove nbsp's $out_data = preg_replace(", ,mU", " ", $out_data); return $out_data; } // end parse_html
protected function _ticket_home() { global $user, $core; if (!$core->v('cron_enabled')) { return $this->e('CRON_DISABLED'); } foreach (w('mail pop3 emailer htmlparser') as $row) { require_once XFS . 'core/' . $row . '.php'; } $pop3 = new pop3(); if (!$pop3->connect($core->v('mail_server'), $core->v('mail_port'))) { return $this->e('MAIL_NO_CONNECT'); } if (!($total_mail = $pop3->login('recent:' . $core->v('mail_ticket_login'), $core->v('mail_ticket_key')))) { return $this->e('MAIL_NEW_MAIL'); } // $mail = new _mail(); $emailer = new emailer(); // if (!($blacklist = $core->cache_load('ticket_blacklist'))) { $sql = 'SELECT * FROM _tickets_blacklist ORDER BY list_id'; $blacklist = $core->cache_store(_rowset($sql, 'list_address', 'list_id')); } if (!($ticket_status = $core->cache_load('ticket_status_default'))) { $sql = 'SELECT status_id FROM _tickets_status WHERE status_default = 1'; $ticket_status = $core->cache_store(_field($sql, 'status_id', 0)); } $sql = 'SELECT group_id, group_email FROM _groups ORDER BY group_email'; $groups = _rowset($sql, 'group_email', 'group_id'); $sql = 'SELECT group_email, group_name FROM _groups ORDER BY group_email'; $groups_name = _rowset($sql, 'group_email', 'group_name'); $sql = 'SELECT gg.group_email, m.user_email FROM _groups gg, _groups_members g, _members m WHERE g.member_mod = ? AND g.member_uid = m.user_id AND gg.group_id = g.member_group ORDER BY m.user_email'; $groups_mods = _rowset(sql_filter($sql, 1), 'group_email', 'user_email', true); foreach ($groups as $a_group_email => $a_group_id) { if (!isset($groups_mods[$a_group_email])) { $groups_mods[$a_group_email] = w(); } } $sql = 'SELECT s.a_assoc, s.a_value FROM _members_fields f, _members_store s WHERE s.a_field = f.field_id AND f.field_alias LIKE ? ORDER BY s.a_value'; $email_alt = _rowset(sql_filter($sql, 'email%'), 'a_value', 'a_assoc'); // Pre mail process $recv = w(); $now = time(); $line_orig = array(' '); $line_repl = array(' '); $_v = w('from from_d to ticket subject body date mod ip spam blacklist reply other'); $_c = w('normal reply other blacklist spam', 0); for ($i = 1; $i <= $total_mail; $i++) { foreach ($_v as $row) { ${'recv_' . $row} = 0; } $s_header = $mail->parse_header(split("\r\n", implode('', $pop3->top($i)))); $recv_from = $mail->parse_address($s_header['from']); if (isset($blacklist[$recv_from])) { $recv_blacklist = 1; } if ($recv_from == $core->v('mail_ticket_login')) { $recv_blacklist = 1; } _dvar($s_header['to'], ''); _dvar($s_header['cc'], ''); if (f($s_header['cc'])) { $s_header['to'] .= (f($s_header['to']) ? ', ' : '') . $s_header['cc']; } $to_part = array_map('trim', explode(strpos($s_header['to'], ',') ? ',' : ';', $s_header['to'])); foreach ($to_part as $row) { if (strpos($row, '<') !== false) { $row = preg_replace('#.*?<(.*?)>#is', '\\1', $row); } if (isset($blacklist[$row])) { $recv_blacklist = 1; } else { $recv_blacklist = 0; $row_first = array_key(explode('@', $row), 0); if (isset($groups[$row_first])) { $recv_to = $row_first; } } } if (strstr($s_header['to'], _lang('MAIL_TO_UNKNOWN')) !== false) { $recv_to = array_key(explode('@', $core->v('mail_ticket_login')), 0); } if (!$recv_to) { $recv_blacklist = 1; } if (!$recv_blacklist) { $recv_subject = htmlencode(trim($s_header['subject'])); if (preg_match('#\\[\\#(.*?)\\]#is', $recv_subject, $p_subject)) { $sql = 'SELECT ticket_id FROM _tickets WHERE ticket_code = ?'; if ($recv_subject_d = _fieldrow(sql_filter($sql, $p_subject[1]))) { $recv_ticket = $recv_subject_d['ticket_id']; $recv_reply = $p_subject[1]; $recv_subject = substr(strrchr($recv_subject, ']'), 3); } } if ($recv_to . '@' . $core->v('domain') == $recv_from && $recv_from == $core->v('mail_ticket_login') && $recv_reply) { $recv_blacklist = 1; } } if (!$recv_blacklist) { if (isset($email_alt[$recv_from])) { $sql_field = 'id'; $sql_value = $email_alt[$recv_from]; } else { $sql_field = 'username'; $sql_value = array_key(explode('@', $recv_from), 0); } $sql = 'SELECT user_id, user_username, user_firstname, user_lastname FROM _members WHERE user_?? = ?'; if ($recv_from_d = _fieldrow(sql_filter($sql, $sql_field, $sql_value))) { $recv_from_d = serialize(array_row($recv_from_d)); } else { $recv_other = 1; } $d_body = $mail->body($s_header, $pop3->fbody($i), true); $recv_date = $mail->parse_date($s_header['date']); $recv_ip = $mail->parse_ip($s_header['received']); if (isset($groups_email[$recv_to])) { $recv_mod = $groups_email[$recv_to]; } if ($recv_date > $now || $recv_date < $now - 86400) { $recv_date = $now; } if (isset($d_body['text-plain']) && f($d_body['text-plain'])) { $recv_body = trim($d_body['text-plain']); } elseif (isset($d_body['text-html']) && f($d_body['text-html'])) { $htm_text = w(); $tag_open = false; $parser = new HtmlParser($d_body['text-html']); while ($parser->parse()) { $line = trim(str_replace($line_orig, $line_repl, $parser->iNodeValue)); if ($tag_open || strpos($line, '<') !== false) { $tag_open = !$tag_open; continue; } if ($parser->iNodeName == 'Text' && f($line)) { $htm_text[] = preg_replace("/(\r\n){1}/", ' ', $line); } } $recv_body = implode("\n", $htm_text); } if (f($recv_body)) { $recv_body = htmlencode(_utf8($recv_body)); } if (!f($recv_body)) { $recv_blacklist = 1; } } $recv[$i] = w(); foreach ($_v as $row) { $recv[$i][$row] = ${'recv_' . $row}; } } foreach ($recv as $i => $row) { if ($row['spam'] || $row['blacklist']) { $pop3->delete($i); $row_key = $row['spam'] ? 'spam' : 'blacklist'; $_c[$row_key]++; continue; } // Send mail to group admin if ($row['other']) { $_c['other']++; if (count($groups_mods[$row['to']])) { foreach ($groups_mods[$row['to']] as $i => $mod_email) { $email_func = !$i ? 'email_address' : 'cc'; $emailer->{$email_func}($mod_email); } $emailer->from($row['from']); $emailer->replyto($row['from']); $emailer->set_subject(entity_decode($row['subject'])); $emailer->use_template('ticket_other'); $emailer->set_decode(true); $emailer->assign_vars(array('SUBJECT' => entity_decode($row['subject']), 'MESSAGE' => entity_decode($row['body']))); $emailer->send(); $emailer->reset(); } $pop3->delete($i); continue; } $row['code'] = $row['reply'] ? $row['reply'] : substr(md5(unique_id()), 0, 8); $row['from_d'] = unserialize($row['from_d']); $row['group_id'] = $groups[$row['to']]; $row['msubject'] = entity_decode(sprintf('%s [#%s]: %s', $groups_name[$row['to']], $row['code'], $row['subject'])); $row['mbody'] = explode("\n", $row['body']); // $body_const = w(); foreach ($row['mbody'] as $part_i => $part_row) { if (isset($row['mbody'][$part_i - 1]) && f($row['mbody'][$part_i - 1]) && f($row['mbody'][$part_i])) { $row['mbody'][$part_i] = "\n" . $part_row; } } $row['body'] = implode("\n", $row['mbody']); $v_mail = array('USERNAME' => $row['from_d']['user_username'], 'FULLNAME' => entity_decode(_fullname($row['from_d'])), 'SUBJECT' => entity_decode($row['subject']), 'MESSAGE' => entity_decode($row['body']), 'TICKET_URL' => _link('ticket', array('x1' => 'view', 'code' => $row['code']))); if (!$row['reply']) { $_c['normal']++; $sql_insert = array('parent' => 0, 'cat' => 1, 'group' => $row['group_id'], 'title' => _subject($row['subject']), 'text' => _prepare($row['body']), 'code' => $row['code'], 'contact' => $row['from_d']['user_id'], 'aby' => 0, 'status' => $ticket_status, 'start' => $row['date'], 'lastreply' => $row['date'], 'end' => 0, 'ip' => $row['ip']); $sql = 'INSERT INTO _tickets' . _build_array('INSERT', prefix('ticket', $sql_insert)); _sql($sql); // Send mail to user $emailer->email_address($row['from']); $emailer->from($row['to'] . '@' . $core->v('domain')); $emailer->set_subject($row['msubject']); $emailer->use_template('ticket_' . $row['to']); $emailer->set_decode(true); $emailer->assign_vars($v_mail); $emailer->send(); $emailer->reset(); // > Send mail to group admin if (count($groups_mods[$row['to']])) { foreach ($groups_mods[$row['to']] as $i => $mod_email) { $address_func = !$i ? 'email_address' : 'cc'; $emailer->{$address_func}($mod_email); } $emailer->from($row['to'] . '@' . $core->v('domain')); $emailer->set_subject($row['msubject']); $emailer->use_template('ticket_' . ($row['reply'] ? 'reply' : 'tech')); $emailer->set_decode(true); $emailer->assign_vars($v_mail); $emailer->send(); $emailer->reset(); } } else { $_c['reply']++; $sql_insert = array('ticket_id' => $row['ticket'], 'user_id' => $row['from_d']['user_id'], 'note_text' => htmlencode($row['body']), 'note_time' => $row['date'], 'note_cc' => 1); $sql = 'INSERT INTO _tickets_notes' . _build_array('INSERT', $sql_insert); _sql($sql); $sql = 'UPDATE _tickets SET ticket_lastreply = ? WHERE ticket_id = ?'; _sql(sql_filter($sql, $row['date'], $row['ticket'])); // Send mail to group members || user $sql = 'SELECT * FROM _tickets_assign a, _members m WHERE a.assign_ticket = ? AND a.user_id = m.user_id AND m.user_username NOT IN (?)'; $tech = _rowset(sql_filter($sql, $row['ticket'], $row['from_d']['user_username'])); if ($row['mod'] != $row['from_d']['user_username']) { $tech[] = $row['mod']; } if (count($tech)) { foreach ($tech as $tech_i => $tech_row) { $m_method = !$tech_i ? 'email_address' : 'cc'; $emailer->{$m_method}($tech_row . '@' . $core->v('domain')); } $emailer->from($row['to'] . '@' . $core->v('domain')); $emailer->use_template('ticket_reply'); $emailer->set_subject($row['msubject']); $emailer->set_decode(true); $emailer->assign_vars($v_mail); $emailer->send(); $emailer->reset(); } } // Delete mail from server $pop3->delete($i); } // Quit server $pop3->quit(); $ret = ''; foreach ($_c as $k => $v) { $ret .= "\n" . $k . ' = ' . $v . '<br />'; } return $this->e($ret); }
function TronqueHtml($chaine, $max, $separateur = ' ', $suffix = ' ...') { $chaine = html_entity_decode($chaine, ENT_QUOTES, 'UTF-8'); $h2t =& new html2text($chaine); $chaine = $h2t->get_text(); if (strlen(strip_tags($chaine)) > $max) { $tabElements = array(); $cur_len = 0; $parser = new HtmlParser($chaine); while ($parser->parse()) { if ($parser->iNodeType == NODE_TYPE_ELEMENT) { array_push($tabElements, $parser->iNodeName); } elseif ($parser->iNodeType == NODE_TYPE_ENDELEMENT) { while (array_pop($tabElements) != $parser->iNodeName) { if (count($tabElements) < 1) { echo 'Erreur : pas de balise ouvrante pour ' . $parser->iNodeName; } } } elseif ($parser->iNodeType == NODE_TYPE_TEXT) { $cur_max = $cur_len + $parser->iNodeEnd - $parser->iNodeStart; if ($cur_max == $max) { $resultat = substr($chaine, 0, $parser->iNodeEnd) . $suffix; while (($balise = array_pop($tabElements)) !== null) { $resultat .= '</' . $balise . '>'; } return $resultat; } elseif ($cur_max > $max) { if (($pos = strrpos(substr($parser->iNodeValue, 0, $max - $cur_len + strlen($separateur)), $separateur)) !== false) { $resultat = substr($chaine, 0, $parser->iNodeStart + $pos) . $suffix; while (($balise = array_pop($tabElements)) !== null) { $resultat .= '</' . $balise . '>'; } return $resultat; } else { $resultat = substr($chaine, 0, $parser->iNodeEnd) . $suffix; while (($balise = array_pop($tabElements)) !== null) { $resultat .= '</' . $balise . '>'; } return $resultat; } } else { $cur_len += $parser->iNodeEnd - $parser->iNodeStart; } } } } $chaine = htmlentities($chaine, ENT_QUOTES, "UTF-8"); return $chaine; }
echo "\nLJ URL: " . $url . "\n"; echo "Archive URL: " . $archiveURL . "\n"; ob_start(); $ch = curl_init(); /// initialize a cURL session curl_setopt($ch, CURLOPT_URL, $archiveURL); curl_exec($ch); curl_close($ch); $curlResponse = ob_get_clean(); if (!is_string($curlResponse) || !strlen($curlResponse)) { echo "Failure Contacting blog: {$archiveURL}\n\n"; break; } else { $contents = str_replace("\n", "", $curlResponse); } $archiveParser = new HtmlParser($contents); while ($archiveParser->parse()) { if (strtolower($archiveParser->iNodeName) == "div") { if (strpos($archiveParser->iNodeAttributes["id"], "post-") !== false) { $entryID = trim(substr($archiveParser->iNodeAttributes["id"], 5)); //echo "Entry ID: " . $entryID . "\n"; break 2; } } } break; } } } while ($ljParser->parse()) { if (strtolower($ljParser->iNodeName) == "table") {
protected function embedImages() { $root = Helper::getWebroot(); //Diretório raiz do site $baseUrlFull = Helper::getBaseUrl(true); //Encontra todas as imagens $this->body = HtmlParser::normalizeImgs($this->body); //echo $this->body; exit; $imgs = HtmlParser::extrairImgs($this->body); //Faz todas as substituições de IMAGENS por CIDs foreach ($imgs as $img) { $urlFormat = $img[0] == '/' && !is_file($img); //No linux o caminho pode começar com / $path = $urlFormat ? dirname($root) . $img : $img; //Gera um path ou usa o que foi lido $internal = !(strpos($path, $baseUrlFull) !== 0 && strpos($path, 'http:') === 0); //É http e endereço é diferente if ($internal or $this->embedExternal) { //Só adiciona externo se estive habilitado $cid = $this->message->message->embed(Swift_Image::fromPath($path)); $this->body = str_replace($img, $cid, $this->body); } } }
/** * wrapper around zaufi's HTML sucker code just to use the html to wiki bit * * \param &$c string -- HTML in * \param &$src string -- output string */ function parse_html(&$inHtml) { //error_reporting(6143); // Read compiled (serialized) grammar $grammarfile = 'lib/htmlparser/htmlgrammar.cmp'; if (!($fp = @fopen($grammarfile, 'r'))) { $smarty->assign('msg', tra("Can't parse HTML data - no grammar file")); $smarty->display("error.tpl"); die; } $grammar = unserialize(fread($fp, filesize($grammarfile))); fclose($fp); // create parser object, insert html code and parse it $htmlparser = new HtmlParser($inHtml, $grammar, '', 0); $htmlparser->Parse(); // Should I try to convert HTML to wiki? $out_data = ''; $p = array('stack' => array(), 'listack' => array(), 'first_td' => false, 'first_tr' => false); walk_and_parse($htmlparser->content, $out_data, $p, ''); // Is some tags still opened? (It can be if HTML not valid, but this is not reason // to produce invalid wiki :) while (count($p['stack'])) { $e = end($p['stack']); $out_data .= $e['string']; array_pop($p['stack']); } // Unclosed lists r ignored... wiki have no special start/end lists syntax.... // OK. Things remains to do: // 1) fix linked images $out_data = preg_replace(',\\[(.*)\\|\\(img src=(.*)\\)\\],mU', '{img src=$2 link=$1}', $out_data); // 2) fix remains images (not in links) $out_data = preg_replace(',\\(img src=(.*)\\),mU', '{img src=$1}', $out_data); // 3) remove empty lines $out_data = preg_replace(",[\n]+,mU", "\n", $out_data); // 4) remove nbsp's $out_data = preg_replace(", ,mU", " ", $out_data); return $out_data; }
} // generate name with timestamp $mpdf->Output('DANIELFANICA_CV_' . date('YmdHis') . '.pdf', $pdfAction); print_r($mpdf); break; /** * EXPERIMENTAL FEATURE!!! */ /** * EXPERIMENTAL FEATURE!!! */ case '/cv/docx': if (ENVIRONMENT !== 'production') { VsWord::autoLoad(); $doc = new VsWord(); $parser = new HtmlParser($doc); $contents = file_get_contents("http://{$_SERVER['SERVER_NAME']}/cv"); $parser->parse($contents); // echo "<pre>{$doc->getDocument()->getBody()->look()}</pre>"; $timestamp = time(); $doc->saveAs("docx/danielfanica_cv_demo_{$timestamp}.docx"); } break; default: if (ENVIRONMENT !== 'development') { // how far behind to we want to look for data? (in days) $prev_no_days = 90; // https://gist.github.com/daitr-gu/472c4f18522172542cca // http://stackoverflow.com/questions/29742460/how-to-get-steps-count-with-google-fit-api-in-php $client = new Google_Client(); $client->setClientId($var['google']['fitness_api']['client_id']);
} } class HtmlElements { protected $elements = array(); /** * addChild elements to the collection * * @param DOMElement $elementCode */ public function addChild($element) { $this->elements[] = $element; } /** * getElements method - get element collection found in the source * * @return int */ public function count() { return count($this->elements); } } try { $dom = new HtmlParser('http://www.bbc.com/'); $elements = $dom->getElements('div'); echo $elements->count(); } catch (Exception $e) { echo $e->getMessage(); }
function insert_html(&$data) { // Strip out HTML comments which don't get parsed right $data = preg_replace('#<!.*?[^>]>#', '', $data); // new code starts here // read grammar // $grammarfile='lib/htmlparser/htmlgrammar.cmp'; // if(!$fp=@fopen($grammarfile,'r')) die(); // $grammar=unserialize(fread($fp,filesize($grammarfile))); // fclose($fp); //vd( $data ); // create parser object and insert html code $htmlparser = new HtmlParser($data, $this->html_grammar, '', 0); // parse it $htmlparser->Parse(); //debug output //vd( $htmlparser->content ); // now set it together $src = ''; $dummy = array(); $this->WalkParsedArray($htmlparser->content, $src, $dummy); /* echo "<pre>"; echo "Walk array:\n\n"; echo $src; echo "</pre>"; die(); */ $this->flush($src); // new code ends here /* old code starts here //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data before parsing:\n$data\n");fclose($fpd); //parse data //replace <br/> $data=preg_replace("#<br/>#","\n",$data); // titlebar $data=preg_replace("#<div class=['\"]titlebar['\"]>(.+)</div>#","<C:titlebar:\$1>",$data); //$data=preg_replace("#<div class='titlebar'>(.+)</div>#e","'<C:titlebar:\$1>'.$this->add_linkdestination('$1')",$data); //line $data=preg_replace("#<hr/>#","<C:hr:>",$data); //headings $data=preg_replace("#<h1>(.+)</h1>#","<C:h1:\$1>",$data); $data=preg_replace("#<h2>(.+)</h2>#","<C:h2:\$1>",$data); $data=preg_replace("#<h3>(.+)</h3>#","<C:h3:\$1>",$data); //images $data=preg_replace("#<img(.+)src=[\"\']([^\"|^\']+)[\"\'].*\\>#","<C:img:\$2>",$data); //links $data=preg_replace("#<a.+href=[\"\']([^\"|^\']+)[\"\'].*>(.*)</a>#e","\$this->whatlink('$1','$2')",$data); //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"before adding text\n");fclose($fpd); //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data:\n$data\n");fclose($fpd); $this->ezText($data,$this->mSettings['textheight']); //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"after adding text\n");fclose($fpd); iold code ends here */ }
} $sdta = @file_get_contents($suck_url); if (isset($php_errormsg) && strlen($php_errormsg)) { $gBitSystem->fatalError(tra("Can't import remote HTML page")); } // Need to parse HTML? if ($parsehtml == 'y') { // Read compiled( serialized ) grammar $grammarfile = UTIL_PKG_PATH . 'htmlparser/htmlgrammar.cmp'; if (!($fp = @fopen($grammarfile, 'r'))) { $gBitSystem->fatalError(tra("Can't parse remote HTML page")); } $grammar = unserialize(fread($fp, filesize($grammarfile))); fclose($fp); // create parser object, insert html code and parse it $htmlparser = new HtmlParser($sdta, $grammar, '', 0); $htmlparser->Parse(); // Should I try to convert HTML to wiki? $parseddata = ''; $p = array('stack' => array(), 'listack' => array(), 'first_td' => false); walk_and_parse($htmlparser->content, $parseddata, $p); // Is some tags still opened?( It can be if HTML not valid, but this is not reason // to produce invalid wiki : ) while (count($p['stack'])) { $e = end($p['stack']); $sdta .= $e['string']; array_pop($p['stack']); } // Unclosed lists r ignored... wiki have no special start/end lists syntax.... // OK. Things remains to do: // 1 ) fix linked images
//get finish $finish = microtime(true); $total_time = round($finish - $start, 4); $resp->url = $_GET['arg0']; $resp->start = $start; $resp->finish = $finish; $resp->totalSeconds = $total_time; //no follow? $obeyNoFollow = true; if (isset($_GET['arg1']) && $_GET['arg1'] == 'false') { $obeyNoFollow = false; } //host $host = Utils::getBaseHost($_GET['arg0']); //parse the page $parser = new HtmlParser($result, $_GET['arg0']); $links = $parser->getTags('a'); $resp->links = array(); foreach ($links as $node) { if (!empty($node->attributes['href']) && !(isset($node->attributes['rel']) && strtolower($node->attributes['rel']) === 'nofollow' && $obeyNoFollow)) { $link = $node->attributes['href']; //validate hosts $lHost = Utils::getBaseHost($link); if (empty($lHost)) { $lHost = $host; } if ($lHost !== $host) { break; } $normal = Utils::normalizePath($link, $_GET['arg0']); if (!in_array($normal, $resp->links)) {
<?php include "htmlparser.inc"; $htmlText = "<html><!-- comment --><body>This is the body</body></html>"; $parser = new HtmlParser($htmlText); while ($parser->parse()) { echo "-----------------------------------\r\n"; echo "Node type: " . $parser->iNodeType . "\r\n"; echo "Node name: " . $parser->iNodeName . "\r\n"; echo "Node value: " . $parser->iNodeValue . "\r\n"; }
/** * Parses links of a given url page to donwload * * @param string $url Page Url to donwload and harvest links * @param string $referer Url referer to register when donwloading page * @param string $open_tag Links open tags * @param string $close_tag Links close tags * @return array Array of links */ protected function harvestLinks($url, $referer, $open_tag, $close_tag) { $links = array(); # Get page base for $url $page_base = Resolver::getBasePageAddress($url); if ($this->config['webbot']['base_domain_relative_links'] == TRUE) { $page_base = Resolver::getBaseDomainAddress($page_base); } # Download webpage $downloaded_page = $this->downloadPage($url, $referer); // esto se puede pponer en una configuracion $anchor_tags = HtmlParser::parse2Array($downloaded_page['FILE'], $open_tag, $close_tag); # Put http attributes for each tag into an array for ($i = 0; $i < count($anchor_tags); $i++) { $href = HtmlParser::getAttribute($anchor_tags[$i], "href"); //echo $links[$i]."<br>"; $resolved_addres = Resolver::resolveAddress($href, $page_base); $links[] = $resolved_addres; //logging $this->logActivity("Harvested: " . $resolved_addres); } return $links; }
function ParseTable($Table) { $_var = ''; $htmlText = $Table; $parser = new HtmlParser($htmlText); while ($parser->parse()) { if (strtolower($parser->iNodeName) == 'table') { if ($parser->iNodeType == NODE_TYPE_ENDELEMENT) { $_var .= '/::'; } else { $_var .= '::'; } } if (strtolower($parser->iNodeName) == 'tr') { if ($parser->iNodeType == NODE_TYPE_ENDELEMENT) { $_var .= '!-:'; } else { $_var .= ':-!'; } //closing row } if (strtolower($parser->iNodeName) == 'td' && $parser->iNodeType == NODE_TYPE_ENDELEMENT) { $_var .= '#,#'; } if ($parser->iNodeName == 'Text' && isset($parser->iNodeValue)) { $_var .= $parser->iNodeValue; } } $elems = explode(':-!', str_replace('/', '', str_replace('::', '', str_replace('!-:', '', $_var)))); //opening row foreach ($elems as $key => $value) { if (trim($value) != '') { $elems2 = explode('#,#', $value); array_pop($elems2); $data[] = $elems2; } } return $data; }
/** * Check param tag in the content. * @return void */ function checkParamTag(&$content) { preg_match_all('/<\\s*param\\s*[^>]+(?:\\/?>?)/is', $content, $m); $paramTagList = $m[0]; if ($paramTagList) { foreach ($paramTagList as $key => $paramTag) { $isWhiteDomain = TRUE; $isWhiteExt = TRUE; $ext = ''; $parser = new HtmlParser($paramTag); while ($parser->parse()) { if ($parser->iNodeAttributes['name'] && $parser->iNodeAttributes['value']) { $name = strtolower($parser->iNodeAttributes['name']); if ($name == 'movie' || $name == 'src' || $name == 'href' || $name == 'url' || $name == 'source') { $ext = strtolower(substr(strrchr($parser->iNodeAttributes['value'], "."), 1)); $isWhiteDomain = $this->isWhiteDomain($parser->iNodeAttributes['value']); if (!$isWhiteDomain) { $content = str_replace($paramTag, htmlspecialchars($paramTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content); } } } } } } }
protected function parse_hierarchy($self_close = null) { $tag_curr = strtolower($this->status['tag_name']); if ($self_close === null) { $this->status['self_close'] = $self_close = isset($this->tags_selfclose[$tag_curr]); } if (!($self_close || $this->status['closing_tag'])) { //$tag_prev = strtolower(end($this->hierarchy)->tag); $tag_prev = strtolower($this->hierarchy[count($this->hierarchy) - 1]->tag); if (isset($this->tags_optional_close[$tag_curr]) && isset($this->tags_optional_close[$tag_curr][$tag_prev])) { array_pop($this->hierarchy); } } return parent::parse_hierarchy($self_close); }
/** * Returns the available classes for a given slot. Can retrieve only the class * name or the full CSS style. This is made with the mode parameter * * @param str The slot's name. * @param int optional 0 retrieves only the class name [Default] * 1 Retrieve the full css style * * @return array The found classes */ public static function findStylesheetClasses($content, $mode = 0) { // This is only a paliative solution. Hope someone can fix the parse class: I don't know Call-time pass-by-reference ini_set('error_reporting', 'E_ERROR'); require_once dirname(__FILE__) . '/../tools/parser/htmlparser.inc'; require_once dirname(__FILE__) . '/../tools/parser/common.inc'; $slotName = $content->getW3sSlot()->getSlotName(); $page = $content->getW3sPage(); // Opens the template and parses its structure $templateAttributes = self::retrieveTemplateAttributesFromPage($page); $templateFile = self::getTemplateFile($templateAttributes["projectName"], $templateAttributes["templateName"]); $p = new HtmlParser($templateFile, unserialize(Read_File("parser/htmlgrammar.cmp")), $templateFile, 1); $p->Parse(); $src = ""; GetPageSrc($p->content, $src); ob_start(); PrintArray($p->content); $contents = ob_get_clean(); // Finds the id of Slots $i = 1; $elements = array($slotName); while (1) { preg_match('/(.*)\\[content\\].*\\[pars\\]\\[id\\]\\[value\\]=' . $slotName . '/', $contents, $res); if (count($res) == 0) { break; } $startKey = str_replace("[", "\\[", $res[1]); $startKey = str_replace("]", "\\]", $startKey); preg_match('/' . $startKey . '\\[pars\\]\\[id\\]\\[value\\]=(.*)/', $contents, $res); $elements[] = $res[1]; $slotName = $res[1]; $i++; // Prevents blocks if an infinite loop occours if a non well-format template is searched if ($i == 100) { break; } } // Finds all the template's stylesheets $fp = fopen($templateFile, "r"); $templateContents = fread($fp, filesize($templateFile)); fclose($fp); $templateContents = str_replace("\r\n", "", $templateContents); preg_match_all('/.*?rel=["|\']stylesheet["|\'].*?href\\s*=\\s*["|\'](.*?)["|\'].*?/', $templateContents, $stylesheets); // Creates a single stylesheet from the stylesheets retrieved $contents = ''; foreach ($stylesheets[1] as $stylesheet) { $stylesheet = substr($stylesheet, 1, strlen($stylesheet)); $fp = fopen($stylesheet, "r"); $currentContent = fread($fp, filesize($stylesheet)); fclose($fp); $currentContent = str_replace("\r\n", "", $currentContent); $currentContent = preg_replace('/HTML>.*?}+?/', '', $currentContent); $contents .= $currentContent; } // Find classes from xhtml elements $result = $mode == 0 ? array('w3sNone' => 'None') : array(); foreach ($elements as $element) { $expression = $mode == 0 ? '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*\\.(.*?)\\{+?/' : '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*(\\..*?\\{.*?\\})+?/'; preg_match_all($expression, $contents, $classes); foreach ($classes[1] as $class) { if ($mode == 0) { $result[$class] = $class; } else { $result[] = $class; } } } // Find classes not associated to xhtml elements $expression = $mode == 0 ? '/(^|})\\.(.*?)\\{+?/' : '/(^|})(\\..*?\\{.*?\\})+?/'; preg_match_all($expression, $contents, $classes); foreach ($classes[2] as $class) { if ($mode == 0) { $result[$class] = $class; } else { $result[] = $class; } } return $result; }