HtmlParser PHP代码示例

示例#1

0

显示文件

文件： MozWrap.php 项目： Nubtehy/openProjects

 /**
  * These are the SEOmoz just discovered data
  * @return multitype:
  */
 public function getMozJustDiscovered()
 {
     $html = $this->moz->getData(MozServices::JD, $this->url);
     file_put_contents('just-discovered.txt', $html);
     $parser = new HtmlParser($html, $this->url);
     $tables = $parser->getTags('table');
     $results = array();
     if (count($tables) > 0) {
         $table = null;
         foreach ($tables as $tbl) {
             if (isset($tbl->attributes['id']) && $tbl->attributes['id'] === 'results') {
                 $table = $tbl;
                 break;
             }
         }
         //moz has data
         if (!empty($table)) {
             $p2 = new HtmlParser($table->raw, $this->url);
             $rows = $p2->getTags('tr');
             foreach ($rows as $tr) {
                 $p3 = new HtmlParser($tr->raw, $this->url);
                 $tds = $p3->getTags('td');
                 if (!empty($tds[0]->text)) {
                     array_push($results, array('link' => trim(strip_tags($tds[0]->text)), 'text' => trim(strip_tags($tds[1]->text)), 'pageAuthority' => trim(strip_tags($tds[2]->text)), 'DomainAuthority' => trim(strip_tags($tds[3]->text)), 'DiscoveryTime' => preg_replace('/\\s+/', ' ', trim(strip_tags($tds[4]->text)))));
                 }
             }
             //no moz data
         } else {
             array_push($results, array('link' => 'No Data', 'text' => 'No Data', 'pageAuthority' => 'No Data', 'DomainAuthority' => 'No Data', 'DiscoveryTime' => 'No Data'));
         }
     }
     return $results;
 }

示例#2

0

显示文件

文件： HtmlParserTest.php 项目： neymanna/fusionforge

 function testSimple()
 {
     $html2wiki = array("<b>bold</b>" => "*bold*", "<strong>strong</strong>" => "*strong*", "<i>italic</i>" => "_italic_", "<em>emphasized</em>" => "_emphasized_", "<HR>" => "----", "<DT><DD>Indent</DD></DT>" => ";:Indent", "<NOWIKI>nowiki</NOWIKI>" => "<verbatim>\nnowiki\n</verbatim>", "<DL><DT> Def </DT><DD> List</DD></DL>" => "; Def : List");
     if (USE_GLOBAL_SAX) {
         $parser = new HtmlParser("PhpWiki2");
     }
     // will not work!
     foreach ($html2wiki as $html => $wiki) {
         if (!USE_GLOBAL_SAX) {
             // redefine it for every run.
             $parser = new HtmlParser("PhpWiki2");
         }
         if (USE_GLOBAL_SAX) {
             $parser->parse($html, false);
         } else {
             $parser->parse($html);
         }
         $this->assertEquals($wiki, trim($parser->output()));
         if (USE_GLOBAL_SAX) {
             unset($GLOBALS['xml_parser_root']);
         } else {
             $parser->__destruct();
         }
     }
 }

示例#3

0

显示文件

文件： ForkRemote.php 项目： nickel715/phorkie

 public function parse()
 {
     $hp = new HtmlParser();
     $ret = $hp->extractGitUrls($this->url);
     $this->arGitUrls = $hp->getGitUrls();
     $this->error = $hp->error;
     return $ret;
 }

示例#4

0

显示文件

文件： docbook.apidoc.php 项目： dapepe/tymio

 public function __construct($mxtDoc, $bolLoadFile = false, $strTemplate = '')
 {
     parent::__construct($mxtDoc, $bolLoadFile);
     $this->strTemplate = $strTemplate;
     $this->srcParser = new \Docbook\SrcParser();
     $this->srcParser->setWrapper('/*!', '*/');
     $this->srcParser->setNodeTemplate(array('cmd' => false, 'params' => array()));
 }

示例#5

0

显示文件

文件： LinkbackReceiver.php 项目： nickel715/phorkie

 /**
  * Stores the linkback as remote fork in the paste repository.
  *
  * @param string $target     Target URI that should be linked in $source
  * @param string $source     Linkback source URI that should link to target
  * @param string $sourceBody Content of $source URI
  * @param object $res        HTTP response from fetching $source
  *
  * @return void
  *
  * @throws SPb\Exception When storing the linkback fatally failed
  */
 public function storeLinkback($target, $source, $sourceBody, \HTTP_Request2_Response $res)
 {
     //FIXME: deleted
     //FIXME: cleanuptask
     $hp = new HtmlParser();
     $ok = $hp->extractGitUrls($source, $sourceBody);
     if ($ok === false) {
         //failed to extract git URL from linkback source
         //FIXME: send exception
         //$hp->error
         return;
     }
     $ci = $this->repo->getConnectionInfo();
     $forks = $ci->getForks();
     $arRemoteCloneUrls = $this->localizeGitUrls($hp->getGitUrls());
     $remoteCloneUrl = $remoteTitle = null;
     if (count($arRemoteCloneUrls)) {
         reset($arRemoteCloneUrls);
         list($remoteCloneUrl, $remoteTitle) = each($arRemoteCloneUrls);
     }
     $remoteid = 'fork-' . uniqid();
     //check if we already know this remote
     foreach ($forks as $remote) {
         if (isset($arRemoteCloneUrls[$remote->getCloneUrl()])) {
             $remoteTitle = $arRemoteCloneUrls[$remote->getCloneUrl()];
             $remoteid = $remote->getName();
             break;
         } else {
             if ($source == $remote->getWebURL(true)) {
                 $remoteid = $remote->getName();
                 break;
             }
         }
     }
     $vc = $this->repo->getVc();
     if (!$this->isLocalWebUrl($source)) {
         //only add remote homepage; we can calculate local ones ourselves
         $vc->getCommand('config')->addArgument('remote.' . $remoteid . '.homepage')->addArgument($source)->execute();
     }
     if ($remoteTitle !== null) {
         $vc->getCommand('config')->addArgument('remote.' . $remoteid . '.title')->addArgument($remoteTitle)->execute();
     }
     if ($remoteCloneUrl !== null) {
         $vc->getCommand('config')->addArgument('remote.' . $remoteid . '.url')->addArgument($remoteCloneUrl)->execute();
     }
 }

示例#6

0

显示文件

文件： Participant.php 项目： riaf/jp.rhaco-users.kaigi

 function beforeInsert($db)
 {
     $event = $db->get(new Event($this->event));
     if (Variable::istype('Event', $event) && $event->isActive($db)) {
         $this->hash = crypt($this->name . time() . mt_rand(0, 9999));
         if ($this->mail) {
             $parser = new HtmlParser('thanks.mtpl');
             $parser->setVariable('participant', $this);
             $mail = new Mail('*****@*****.**', 'rhaco kaigi');
             $mail->to($this->mail, $this->name);
             $mail->subject('rhaco kaigi');
             $mail->message($parser->read());
             $mail->send();
         }
         return true;
     }
     return false;
 }

示例#7

0

显示文件

文件： clock.php 项目： jamesrusso/Aastra_Scripts

function filter_format($string)
{
    $parser = new HtmlParser($string);
    $type = 0;
    $found = 0;
    while ($found == 0 and $parser->parse()) {
        if ($parser->iNodeName == 'Text') {
            if ($type == 0) {
                if (stristr($parser->iNodeValue, 'Current Time')) {
                    $type = 1;
                }
            } else {
                $value = $parser->iNodeValue;
                $found = 1;
            }
        }
    }
    return $value;
}

示例#8

0

显示文件

文件： AlternatesParser.php 项目： innmind/crawler

 private function merge(MapInterface $http, MapInterface $html) : MapInterface
 {
     if (!$http->contains(HttpParser::key())) {
         return $html;
     }
     if (!$html->contains(HtmlParser::key())) {
         return $http;
     }
     return $http->put(self::key(), $http->get(HttpParser::key())->merge($html->get(HtmlParser::key())));
 }

示例#9

0

显示文件

文件： index.php 项目： riaf/jp.rhaco-users.kaigi

<?php

require dirname(__FILE__) . '/__init__.php';
Rhaco::import('tag.HtmlParser');
$db = new DbUtil(Event::connection());
$p = new HtmlParser('index.html');
$p->setVariable('event', $db->get(new Event(), new C(Q::depend(), Q::eq(Event::columnId(), Rhaco::constant('CURRENT_EVENT', 1)))));
$p->setVariable('hatena', Rhaco::obj('HatenaSyntax', array('headlevel' => 4, 'id' => 'event_description')));
$p->write();

示例#10

0

显示文件

文件： body_text_extractor.php 项目： ngxuanmui/hanhphuc.vn

 public function extractFromString($string)
 {
     $body_text = array();
     if ($string != '') {
         $o_HtmlParser = new HtmlParser();
         $a_DataParser = $o_HtmlParser->parser($string);
         if (!$a_DataParser) {
             return 'Cannot read your data !';
         }
         /* do smart binary array */
         $i = 0;
         $j = 0;
         $k = 0;
         $endcode = array();
         $idx = 0;
         $token = array();
         foreach ($a_DataParser['tokenized_data'] as $index => $sz_tokenizedData) {
             /* If is tag */
             if ($this->isTag($a_DataParser, $index)) {
                 $binary_token[$i] = 1;
             } elseif ($this->isTagClose($a_DataParser, $index)) {
                 $binary_token[$i] = 0;
             } else {
                 $binary_token[$i] = -1;
                 $token[$idx] = $sz_tokenizedData;
                 $idx++;
             }
             $i++;
         }
         for ($k = 0; $k < $i; $k++) {
             $x = $binary_token[$k];
             /* Add an index for close tag. We will use it when calculate body text */
             if ($x == 0) {
                 $j++;
                 $endcode[$j] = 0;
                 $j++;
                 continue;
             }
             if (abs($x + $endcode[$j]) < abs($endcode[$j])) {
                 $j++;
             }
             $endcode[$j] += $x;
         }
         /* Extract body text */
         $i_max = 0;
         $j_max = 0;
         $max = 0;
         for ($i = 0; $i < count($endcode) - 1; $i++) {
             if ($endcode[$i] >= 0) {
                 continue;
             }
             for ($j = $i; $j < count($endcode); $j++) {
                 if ($endcode[$j] >= 0) {
                     continue;
                 }
                 /* Calculate max in range [i .. j] */
                 $S = $this->i_TagBefore($endcode, $i) + $this->i_fTagAfter($endcode, $j) + $this->i_fTextBetween($endcode, $i, $j);
                 if ($S > $max) {
                     $max = $S;
                     $i_max = $i;
                     $j_max = $j;
                 }
             }
         }
         /* Calculate start and end point */
         $start = 0;
         $end = 0;
         for ($i = 0; $i < $i_max; $i++) {
             if ($endcode[$i] == 0) {
                 $start++;
             } else {
                 $start += abs($endcode[$i]);
             }
         }
         for ($i = 0; $i < $j_max; $i++) {
             if ($endcode[$i] == 0) {
                 $end++;
             } else {
                 $end += abs($endcode[$i]);
             }
         }
         $return_text = array();
         /* Calculate body text */
         //for($i = $start - 1; $i <= $end -1; $i++)
         for ($i = 0; $i <= $end - 1; $i++) {
             $body_text[] = $token[$i];
         }
     }
     $body_text = implode(' ', $body_text);
     return $body_text;
 }

示例#11

0

显示文件

文件： question.php 项目： xiongchiamiov/wwmqt-svn

 /**
  * @desc Generates the HTML for a particular question.
  * @param integer $seed The seed of the question.
  * @param array $answers An array of answers that needs to be rendered.
  * @param object $event The event object.
  * @return string The HTML question representation.
  */
 public function render($seed, &$answers, $event)
 {
     //JIT Derivation creation
     //Usually we have this from the check answers call
     if (!isset($this->_derivation)) {
         $client = WebworkClient::Get();
         $env = WebworkQuestion::DefaultEnvironment();
         $env->problemSeed = $seed;
         $result = $client->renderProblem($env, $this->_data->code);
         $derivation = new stdClass();
         $derivation->html = base64_decode($result->output);
         $derivation->seed = $result->seed;
         $this->_derivation = $derivation;
     }
     $orderedanswers = array();
     $tempanswers = array();
     foreach ($answers as $answer) {
         $tempanswers[$answer->field] = $answer;
     }
     $answers = $tempanswers;
     $showpartialanswers = $this->_data->grading;
     $questionhtml = "";
     $parser = new HtmlParser($this->_derivation->html);
     $currentselect = "";
     $textarea = false;
     $checkboxes = array();
     while ($parser->parse()) {
         //change some attributes of html tags for moodle compliance
         if ($parser->iNodeType == NODE_TYPE_ELEMENT) {
             $nodename = $parser->iNodeName;
             if (isset($parser->iNodeAttributes['name'])) {
                 $name = $parser->iNodeAttributes['name'];
             }
             //handle generic change of node's attribute name
             if ($nodename == "INPUT" || $nodename == "SELECT" || $nodename == "TEXTAREA") {
                 $parser->iNodeAttributes['name'] = 'resp' . $this->_data->question . '_' . $name;
                 if ($event == QUESTION_EVENTGRADE && isset($answers[$name])) {
                     if ($showpartialanswers) {
                         if (isset($parser->iNodeAttributes['class'])) {
                             $class = $parser->iNodeAttributes['class'];
                         } else {
                             $class = "";
                         }
                         $parser->iNodeAttributes['class'] = $class . ' ' . question_get_feedback_class($answers[$name]->score);
                     }
                 }
             }
             //handle specific change
             if ($nodename == "INPUT") {
                 $nodetype = strtoupper($parser->iNodeAttributes['type']);
                 if ($nodetype == "CHECKBOX") {
                     if (strstr($answers[$name]->answer, $parser->iNodeAttributes['value'])) {
                         //FILLING IN ANSWER (CHECKBOX)
                         array_push($orderedanswers, $answers[$name]);
                         $parser->iNodeAttributes['checked'] = '1';
                     }
                     $parser->iNodeAttributes['name'] = $parser->iNodeAttributes['name'] . '_' . $parser->iNodeAttributes['value'];
                 } else {
                     if ($nodetype == "TEXT") {
                         if (isset($answers[$name])) {
                             //FILLING IN ANSWER (FIELD)
                             array_push($orderedanswers, $answers[$name]);
                             $parser->iNodeAttributes['value'] = $answers[$name]->answer;
                         }
                     }
                 }
             } else {
                 if ($nodename == "SELECT") {
                     $currentselect = $name;
                 } else {
                     if ($nodename == "OPTION") {
                         if ($parser->iNodeAttributes['value'] == $answers[$currentselect]->answer) {
                             //FILLING IN ANSWER (DROPDOWN)
                             array_push($orderedanswers, $answers[$currentselect]);
                             $parser->iNodeAttributes['selected'] = '1';
                         }
                     } else {
                         if ($nodename == "TEXTAREA") {
                             if (isset($answers[$name])) {
                                 array_push($orderedanswers, $answers[$name]);
                                 $textarea = true;
                                 $questionhtml .= $parser->printTag();
                                 $questionhtml .= $answers[$name]->answer;
                             }
                         }
                     }
                 }
             }
         }
         if (!$textarea) {
             $questionhtml .= $parser->printTag();
         } else {
             $textarea = false;
         }
     }
     $answers = $orderedanswers;
     return $questionhtml;
 }

示例#12

0

显示文件

文件： questiontype.php 项目： bjornbe/wwassignment

 /**
  * @desc Prints the question. Calls question_webwork_derived, and prints out the html associated with derivedid.
  * @param $question object The question object to print.
  * @param $state object The state of the responses for the question.
  * @param $cmoptions object Options containing course ID.
  * @param $options object
  */
 function print_question_formulation_and_controls(&$question, &$state, $cmoptions, $options)
 {
     global $CFG, $USER;
     $readonly = empty($options->readonly) ? '' : 'disabled="disabled"';
     //Formulate question image and text
     $questiontext = $this->format_text($question->questiontext, $question->questiontextformat, $cmoptions);
     $image = get_question_image($question, $cmoptions->course);
     $derivationid = $state->responses['derivationid'];
     $derivation = get_record('question_webwork_derived', 'id', $derivationid);
     $unparsedhtml = base64_decode($derivation->html);
     //partial answers
     $showPartiallyCorrectAnswers = $question->grading;
     //new array keyed by field
     $fieldhash = $state->responses['answers'];
     $answerfields = array();
     $parser = new HtmlParser($unparsedhtml);
     $currentselect = "";
     while ($parser->parse()) {
         //change some attributes of html tags for moodle compliance
         if ($parser->iNodeType == NODE_TYPE_ELEMENT) {
             $nodename = $parser->iNodeName;
             $name = $parser->iNodeAttributes['name'];
             //handle generic change of node's attribute name
             if ($nodename == "INPUT" || $nodename == "SELECT" || $nodename == "TEXTAREA") {
                 $parser->iNodeAttributes['name'] = 'resp' . $question->id . '_' . $name;
                 if ($state->event == QUESTION_EVENTGRADE && isset($fieldhash[$name])) {
                     if ($showPartiallyCorrectAnswers) {
                         $parser->iNodeAttributes['class'] = $parser->iNodeAttributes['class'] . ' ' . question_get_feedback_class($fieldhash[$name]['score']);
                     }
                 }
                 if (!strstr($name, 'previous')) {
                     $answerfields[$name] = $fieldhash[$name];
                 }
             }
             //handle specific change
             if ($nodename == "INPUT") {
                 //put submitted value into field
                 if (isset($fieldhash[$name])) {
                     $parser->iNodeAttributes['value'] = $fieldhash[$name]['answer'];
                 }
             } else {
                 if ($nodename == "SELECT") {
                     $currentselect = $name;
                 } else {
                     if ($nodename == "OPTION") {
                         if ($parser->iNodeAttributes['value'] == $fieldhash[$currentselect]['answer']) {
                             $parser->iNodeAttributes['selected'] = '1';
                         }
                     } else {
                         if ($nodename == "TEXTAREA") {
                         }
                     }
                 }
             }
         }
         $problemhtml .= $parser->printTag();
     }
     //for the seed form field
     $qid = $question->id;
     $seed = $state->responses['seed'];
     //if the student has answered
     include "{$CFG->dirroot}/question/type/webwork/display.html";
 }

示例#13

0

显示文件

文件： docbook.svndoc.php 项目： dapepe/tymio

 public function __construct($mxtDoc, $bolLoadFile = false, $strTemplate)
 {
     parent::__construct($mxtDoc, $bolLoadFile);
     $this->strTemplate = $strTemplate;
 }

示例#14

0

显示文件

文件： editlib.php 项目： railfuture/tiki-website

	/**
	 * wrapper around zaufi's HTML sucker code just to use the html to wiki bit
	 *
	 * \param &$c string -- HTML in
	 * \param &$src string -- output string
	 */
	
	
	function parse_html(&$inHtml)
	{
		global $smarty;

		include ('lib/htmlparser/htmlparser.inc');
	
		// Read compiled (serialized) grammar
		$grammarfile = 'lib/htmlparser/htmlgrammar.cmp';
		if (!$fp = @fopen($grammarfile, 'r')) {
			$smarty->assign('msg', tra("Can't parse HTML data - no grammar file"));
			$smarty->display("error.tpl");
			die;
		}
		$grammar = unserialize(fread($fp, filesize($grammarfile)));
		fclose($fp);
		
		// process a few ckeditor artifacts
		$inHtml = str_replace('<p></p>', '', $inHtml);	// empty p tags are invisible
		
		// create parser object, insert html code and parse it
		$htmlparser = new HtmlParser($inHtml, $grammar, '', 0);
		$htmlparser->Parse();
		// Should I try to convert HTML to wiki?
		$out_data = '';
		/*
		 * ['stack'] = array
		 * Speacial keys introduced to convert to Wiki
		 * - ['wikitags']     = the number of 'wikistack' entries produced by the html tag
		 * 
		 * ['wikistack'] = array(), is used to save the wiki markup for the linebreak handling (1 array = 1 html tag)
		 * Each array entry contains the following keys: 
		 * - ['begin']        = array() of begin markups (1 style definition = 1 array entry)
		 * - ['end']          = array() of end markups
		 * 
		 * wiki_lbr  = true if we must use '%%%' for linebreaks instead of '\n'
		 */
		$p = array('stack' => array(), 'listack' => array(), 'wikistack' => array(),  
			'wiki_lbr' => 0, 'first_td' => false, 'first_tr' => false);
		$this->walk_and_parse($htmlparser->content, $out_data, $p, '');
		// Is some tags still opened? (It can be if HTML not valid, but this is not reason
		// to produce invalid wiki :)
		while (count($p['stack'])) {
			$e = end($p['stack']);
			$out_data .= $e['string'];
			array_pop($p['stack']);
		}
		// Unclosed lists r ignored... wiki have no special start/end lists syntax....
		// OK. Things remains to do:
		// 1) fix linked images
		$out_data = preg_replace(',\[(.*)\|\(img src=(.*)\)\],mU', '{img src=$2 link=$1}', $out_data);
		// 2) fix remains images (not in links)
		$out_data = preg_replace(',\(img src=(.*)\),mU', '{img src=$1}', $out_data);
		// 3) remove empty lines
		$out_data = preg_replace(",[\n]+,mU", "\n", $out_data);
		// 4) remove nbsp's
		$out_data = preg_replace(",&#160;,mU", " ", $out_data);
		
		return $out_data;
	}	// end parse_html

示例#15

0

显示文件

文件： _cron.php 项目： nopticon/tts

    protected function _ticket_home()
    {
        global $user, $core;
        if (!$core->v('cron_enabled')) {
            return $this->e('CRON_DISABLED');
        }
        foreach (w('mail pop3 emailer htmlparser') as $row) {
            require_once XFS . 'core/' . $row . '.php';
        }
        $pop3 = new pop3();
        if (!$pop3->connect($core->v('mail_server'), $core->v('mail_port'))) {
            return $this->e('MAIL_NO_CONNECT');
        }
        if (!($total_mail = $pop3->login('recent:' . $core->v('mail_ticket_login'), $core->v('mail_ticket_key')))) {
            return $this->e('MAIL_NEW_MAIL');
        }
        //
        $mail = new _mail();
        $emailer = new emailer();
        //
        if (!($blacklist = $core->cache_load('ticket_blacklist'))) {
            $sql = 'SELECT *
				FROM _tickets_blacklist
				ORDER BY list_id';
            $blacklist = $core->cache_store(_rowset($sql, 'list_address', 'list_id'));
        }
        if (!($ticket_status = $core->cache_load('ticket_status_default'))) {
            $sql = 'SELECT status_id
				FROM _tickets_status
				WHERE status_default = 1';
            $ticket_status = $core->cache_store(_field($sql, 'status_id', 0));
        }
        $sql = 'SELECT group_id, group_email
			FROM _groups
			ORDER BY group_email';
        $groups = _rowset($sql, 'group_email', 'group_id');
        $sql = 'SELECT group_email, group_name
			FROM _groups
			ORDER BY group_email';
        $groups_name = _rowset($sql, 'group_email', 'group_name');
        $sql = 'SELECT gg.group_email, m.user_email
			FROM _groups gg, _groups_members g, _members m
			WHERE g.member_mod = ?
				AND g.member_uid = m.user_id
				AND gg.group_id = g.member_group
			ORDER BY m.user_email';
        $groups_mods = _rowset(sql_filter($sql, 1), 'group_email', 'user_email', true);
        foreach ($groups as $a_group_email => $a_group_id) {
            if (!isset($groups_mods[$a_group_email])) {
                $groups_mods[$a_group_email] = w();
            }
        }
        $sql = 'SELECT s.a_assoc, s.a_value
			FROM _members_fields f, _members_store s
			WHERE s.a_field = f.field_id
				AND f.field_alias LIKE ?
			ORDER BY s.a_value';
        $email_alt = _rowset(sql_filter($sql, 'email%'), 'a_value', 'a_assoc');
        // Pre mail process
        $recv = w();
        $now = time();
        $line_orig = array('&nbsp;');
        $line_repl = array(' ');
        $_v = w('from from_d to ticket subject body date mod ip spam blacklist reply other');
        $_c = w('normal reply other blacklist spam', 0);
        for ($i = 1; $i <= $total_mail; $i++) {
            foreach ($_v as $row) {
                ${'recv_' . $row} = 0;
            }
            $s_header = $mail->parse_header(split("\r\n", implode('', $pop3->top($i))));
            $recv_from = $mail->parse_address($s_header['from']);
            if (isset($blacklist[$recv_from])) {
                $recv_blacklist = 1;
            }
            if ($recv_from == $core->v('mail_ticket_login')) {
                $recv_blacklist = 1;
            }
            _dvar($s_header['to'], '');
            _dvar($s_header['cc'], '');
            if (f($s_header['cc'])) {
                $s_header['to'] .= (f($s_header['to']) ? ', ' : '') . $s_header['cc'];
            }
            $to_part = array_map('trim', explode(strpos($s_header['to'], ',') ? ',' : ';', $s_header['to']));
            foreach ($to_part as $row) {
                if (strpos($row, '<') !== false) {
                    $row = preg_replace('#.*?<(.*?)>#is', '\\1', $row);
                }
                if (isset($blacklist[$row])) {
                    $recv_blacklist = 1;
                } else {
                    $recv_blacklist = 0;
                    $row_first = array_key(explode('@', $row), 0);
                    if (isset($groups[$row_first])) {
                        $recv_to = $row_first;
                    }
                }
            }
            if (strstr($s_header['to'], _lang('MAIL_TO_UNKNOWN')) !== false) {
                $recv_to = array_key(explode('@', $core->v('mail_ticket_login')), 0);
            }
            if (!$recv_to) {
                $recv_blacklist = 1;
            }
            if (!$recv_blacklist) {
                $recv_subject = htmlencode(trim($s_header['subject']));
                if (preg_match('#\\[\\#(.*?)\\]#is', $recv_subject, $p_subject)) {
                    $sql = 'SELECT ticket_id
						FROM _tickets
						WHERE ticket_code = ?';
                    if ($recv_subject_d = _fieldrow(sql_filter($sql, $p_subject[1]))) {
                        $recv_ticket = $recv_subject_d['ticket_id'];
                        $recv_reply = $p_subject[1];
                        $recv_subject = substr(strrchr($recv_subject, ']'), 3);
                    }
                }
                if ($recv_to . '@' . $core->v('domain') == $recv_from && $recv_from == $core->v('mail_ticket_login') && $recv_reply) {
                    $recv_blacklist = 1;
                }
            }
            if (!$recv_blacklist) {
                if (isset($email_alt[$recv_from])) {
                    $sql_field = 'id';
                    $sql_value = $email_alt[$recv_from];
                } else {
                    $sql_field = 'username';
                    $sql_value = array_key(explode('@', $recv_from), 0);
                }
                $sql = 'SELECT user_id, user_username, user_firstname, user_lastname
					FROM _members
					WHERE user_?? = ?';
                if ($recv_from_d = _fieldrow(sql_filter($sql, $sql_field, $sql_value))) {
                    $recv_from_d = serialize(array_row($recv_from_d));
                } else {
                    $recv_other = 1;
                }
                $d_body = $mail->body($s_header, $pop3->fbody($i), true);
                $recv_date = $mail->parse_date($s_header['date']);
                $recv_ip = $mail->parse_ip($s_header['received']);
                if (isset($groups_email[$recv_to])) {
                    $recv_mod = $groups_email[$recv_to];
                }
                if ($recv_date > $now || $recv_date < $now - 86400) {
                    $recv_date = $now;
                }
                if (isset($d_body['text-plain']) && f($d_body['text-plain'])) {
                    $recv_body = trim($d_body['text-plain']);
                } elseif (isset($d_body['text-html']) && f($d_body['text-html'])) {
                    $htm_text = w();
                    $tag_open = false;
                    $parser = new HtmlParser($d_body['text-html']);
                    while ($parser->parse()) {
                        $line = trim(str_replace($line_orig, $line_repl, $parser->iNodeValue));
                        if ($tag_open || strpos($line, '<') !== false) {
                            $tag_open = !$tag_open;
                            continue;
                        }
                        if ($parser->iNodeName == 'Text' && f($line)) {
                            $htm_text[] = preg_replace("/(\r\n){1}/", ' ', $line);
                        }
                    }
                    $recv_body = implode("\n", $htm_text);
                }
                if (f($recv_body)) {
                    $recv_body = htmlencode(_utf8($recv_body));
                }
                if (!f($recv_body)) {
                    $recv_blacklist = 1;
                }
            }
            $recv[$i] = w();
            foreach ($_v as $row) {
                $recv[$i][$row] = ${'recv_' . $row};
            }
        }
        foreach ($recv as $i => $row) {
            if ($row['spam'] || $row['blacklist']) {
                $pop3->delete($i);
                $row_key = $row['spam'] ? 'spam' : 'blacklist';
                $_c[$row_key]++;
                continue;
            }
            // Send mail to group admin
            if ($row['other']) {
                $_c['other']++;
                if (count($groups_mods[$row['to']])) {
                    foreach ($groups_mods[$row['to']] as $i => $mod_email) {
                        $email_func = !$i ? 'email_address' : 'cc';
                        $emailer->{$email_func}($mod_email);
                    }
                    $emailer->from($row['from']);
                    $emailer->replyto($row['from']);
                    $emailer->set_subject(entity_decode($row['subject']));
                    $emailer->use_template('ticket_other');
                    $emailer->set_decode(true);
                    $emailer->assign_vars(array('SUBJECT' => entity_decode($row['subject']), 'MESSAGE' => entity_decode($row['body'])));
                    $emailer->send();
                    $emailer->reset();
                }
                $pop3->delete($i);
                continue;
            }
            $row['code'] = $row['reply'] ? $row['reply'] : substr(md5(unique_id()), 0, 8);
            $row['from_d'] = unserialize($row['from_d']);
            $row['group_id'] = $groups[$row['to']];
            $row['msubject'] = entity_decode(sprintf('%s [#%s]: %s', $groups_name[$row['to']], $row['code'], $row['subject']));
            $row['mbody'] = explode("\n", $row['body']);
            //
            $body_const = w();
            foreach ($row['mbody'] as $part_i => $part_row) {
                if (isset($row['mbody'][$part_i - 1]) && f($row['mbody'][$part_i - 1]) && f($row['mbody'][$part_i])) {
                    $row['mbody'][$part_i] = "\n" . $part_row;
                }
            }
            $row['body'] = implode("\n", $row['mbody']);
            $v_mail = array('USERNAME' => $row['from_d']['user_username'], 'FULLNAME' => entity_decode(_fullname($row['from_d'])), 'SUBJECT' => entity_decode($row['subject']), 'MESSAGE' => entity_decode($row['body']), 'TICKET_URL' => _link('ticket', array('x1' => 'view', 'code' => $row['code'])));
            if (!$row['reply']) {
                $_c['normal']++;
                $sql_insert = array('parent' => 0, 'cat' => 1, 'group' => $row['group_id'], 'title' => _subject($row['subject']), 'text' => _prepare($row['body']), 'code' => $row['code'], 'contact' => $row['from_d']['user_id'], 'aby' => 0, 'status' => $ticket_status, 'start' => $row['date'], 'lastreply' => $row['date'], 'end' => 0, 'ip' => $row['ip']);
                $sql = 'INSERT INTO _tickets' . _build_array('INSERT', prefix('ticket', $sql_insert));
                _sql($sql);
                // Send mail to user
                $emailer->email_address($row['from']);
                $emailer->from($row['to'] . '@' . $core->v('domain'));
                $emailer->set_subject($row['msubject']);
                $emailer->use_template('ticket_' . $row['to']);
                $emailer->set_decode(true);
                $emailer->assign_vars($v_mail);
                $emailer->send();
                $emailer->reset();
                // > Send mail to group admin
                if (count($groups_mods[$row['to']])) {
                    foreach ($groups_mods[$row['to']] as $i => $mod_email) {
                        $address_func = !$i ? 'email_address' : 'cc';
                        $emailer->{$address_func}($mod_email);
                    }
                    $emailer->from($row['to'] . '@' . $core->v('domain'));
                    $emailer->set_subject($row['msubject']);
                    $emailer->use_template('ticket_' . ($row['reply'] ? 'reply' : 'tech'));
                    $emailer->set_decode(true);
                    $emailer->assign_vars($v_mail);
                    $emailer->send();
                    $emailer->reset();
                }
            } else {
                $_c['reply']++;
                $sql_insert = array('ticket_id' => $row['ticket'], 'user_id' => $row['from_d']['user_id'], 'note_text' => htmlencode($row['body']), 'note_time' => $row['date'], 'note_cc' => 1);
                $sql = 'INSERT INTO _tickets_notes' . _build_array('INSERT', $sql_insert);
                _sql($sql);
                $sql = 'UPDATE _tickets SET ticket_lastreply = ?
					WHERE ticket_id = ?';
                _sql(sql_filter($sql, $row['date'], $row['ticket']));
                // Send mail to group members || user
                $sql = 'SELECT *
					FROM _tickets_assign a, _members m
					WHERE a.assign_ticket = ?
						AND a.user_id = m.user_id
						AND m.user_username NOT IN (?)';
                $tech = _rowset(sql_filter($sql, $row['ticket'], $row['from_d']['user_username']));
                if ($row['mod'] != $row['from_d']['user_username']) {
                    $tech[] = $row['mod'];
                }
                if (count($tech)) {
                    foreach ($tech as $tech_i => $tech_row) {
                        $m_method = !$tech_i ? 'email_address' : 'cc';
                        $emailer->{$m_method}($tech_row . '@' . $core->v('domain'));
                    }
                    $emailer->from($row['to'] . '@' . $core->v('domain'));
                    $emailer->use_template('ticket_reply');
                    $emailer->set_subject($row['msubject']);
                    $emailer->set_decode(true);
                    $emailer->assign_vars($v_mail);
                    $emailer->send();
                    $emailer->reset();
                }
            }
            // Delete mail from server
            $pop3->delete($i);
        }
        // Quit server
        $pop3->quit();
        $ret = '';
        foreach ($_c as $k => $v) {
            $ret .= "\n" . $k . ' = ' . $v . '<br />';
        }
        return $this->e($ret);
    }

示例#16

0

显示文件

文件： app_controller.php 项目： voidtek/artist-directory

function TronqueHtml($chaine, $max, $separateur = ' ', $suffix = ' ...')
{
    $chaine = html_entity_decode($chaine, ENT_QUOTES, 'UTF-8');
    $h2t =& new html2text($chaine);
    $chaine = $h2t->get_text();
    if (strlen(strip_tags($chaine)) > $max) {
        $tabElements = array();
        $cur_len = 0;
        $parser = new HtmlParser($chaine);
        while ($parser->parse()) {
            if ($parser->iNodeType == NODE_TYPE_ELEMENT) {
                array_push($tabElements, $parser->iNodeName);
            } elseif ($parser->iNodeType == NODE_TYPE_ENDELEMENT) {
                while (array_pop($tabElements) != $parser->iNodeName) {
                    if (count($tabElements) < 1) {
                        echo 'Erreur : pas de balise ouvrante pour ' . $parser->iNodeName;
                    }
                }
            } elseif ($parser->iNodeType == NODE_TYPE_TEXT) {
                $cur_max = $cur_len + $parser->iNodeEnd - $parser->iNodeStart;
                if ($cur_max == $max) {
                    $resultat = substr($chaine, 0, $parser->iNodeEnd) . $suffix;
                    while (($balise = array_pop($tabElements)) !== null) {
                        $resultat .= '</' . $balise . '>';
                    }
                    return $resultat;
                } elseif ($cur_max > $max) {
                    if (($pos = strrpos(substr($parser->iNodeValue, 0, $max - $cur_len + strlen($separateur)), $separateur)) !== false) {
                        $resultat = substr($chaine, 0, $parser->iNodeStart + $pos) . $suffix;
                        while (($balise = array_pop($tabElements)) !== null) {
                            $resultat .= '</' . $balise . '>';
                        }
                        return $resultat;
                    } else {
                        $resultat = substr($chaine, 0, $parser->iNodeEnd) . $suffix;
                        while (($balise = array_pop($tabElements)) !== null) {
                            $resultat .= '</' . $balise . '>';
                        }
                        return $resultat;
                    }
                } else {
                    $cur_len += $parser->iNodeEnd - $parser->iNodeStart;
                }
            }
        }
    }
    $chaine = htmlentities($chaine, ENT_QUOTES, "UTF-8");
    return $chaine;
}

示例#17

0

显示文件

文件： ljcommentgrabber-wp.php 项目： brockboland/LiveJournal-Comment-Scraper

             echo "\nLJ URL: " . $url . "\n";
             echo "Archive URL: " . $archiveURL . "\n";
             ob_start();
             $ch = curl_init();
             /// initialize a cURL session
             curl_setopt($ch, CURLOPT_URL, $archiveURL);
             curl_exec($ch);
             curl_close($ch);
             $curlResponse = ob_get_clean();
             if (!is_string($curlResponse) || !strlen($curlResponse)) {
                 echo "Failure Contacting blog: {$archiveURL}\n\n";
                 break;
             } else {
                 $contents = str_replace("\n", "", $curlResponse);
             }
             $archiveParser = new HtmlParser($contents);
             while ($archiveParser->parse()) {
                 if (strtolower($archiveParser->iNodeName) == "div") {
                     if (strpos($archiveParser->iNodeAttributes["id"], "post-") !== false) {
                         $entryID = trim(substr($archiveParser->iNodeAttributes["id"], 5));
                         //echo "Entry ID: " . $entryID . "\n";
                         break 2;
                     }
                 }
             }
             break;
         }
     }
 }
 while ($ljParser->parse()) {
     if (strtolower($ljParser->iNodeName) == "table") {

示例#18

0

显示文件

文件： Email.php 项目： bruno-melo/components

 protected function embedImages()
 {
     $root = Helper::getWebroot();
     //Diretório raiz do site
     $baseUrlFull = Helper::getBaseUrl(true);
     //Encontra todas as imagens
     $this->body = HtmlParser::normalizeImgs($this->body);
     //echo $this->body; exit;
     $imgs = HtmlParser::extrairImgs($this->body);
     //Faz todas as substituições de IMAGENS por CIDs
     foreach ($imgs as $img) {
         $urlFormat = $img[0] == '/' && !is_file($img);
         //No linux o caminho pode começar com /
         $path = $urlFormat ? dirname($root) . $img : $img;
         //Gera um path ou usa o que foi lido
         $internal = !(strpos($path, $baseUrlFull) !== 0 && strpos($path, 'http:') === 0);
         //É http e endereço é diferente
         if ($internal or $this->embedExternal) {
             //Só adiciona externo se estive habilitado
             $cid = $this->message->message->embed(Swift_Image::fromPath($path));
             $this->body = str_replace($img, $cid, $this->body);
         }
     }
 }

示例#19

0

显示文件

文件： tiki-editpage.php 项目： Kraiany/kraiany_site_docker

/**
 * wrapper around zaufi's HTML sucker code just to use the html to wiki bit
 *
 * \param &$c string -- HTML in
 * \param &$src string -- output string
 */
function parse_html(&$inHtml)
{
    //error_reporting(6143);
    // Read compiled (serialized) grammar
    $grammarfile = 'lib/htmlparser/htmlgrammar.cmp';
    if (!($fp = @fopen($grammarfile, 'r'))) {
        $smarty->assign('msg', tra("Can't parse HTML data - no grammar file"));
        $smarty->display("error.tpl");
        die;
    }
    $grammar = unserialize(fread($fp, filesize($grammarfile)));
    fclose($fp);
    // create parser object, insert html code and parse it
    $htmlparser = new HtmlParser($inHtml, $grammar, '', 0);
    $htmlparser->Parse();
    // Should I try to convert HTML to wiki?
    $out_data = '';
    $p = array('stack' => array(), 'listack' => array(), 'first_td' => false, 'first_tr' => false);
    walk_and_parse($htmlparser->content, $out_data, $p, '');
    // Is some tags still opened? (It can be if HTML not valid, but this is not reason
    // to produce invalid wiki :)
    while (count($p['stack'])) {
        $e = end($p['stack']);
        $out_data .= $e['string'];
        array_pop($p['stack']);
    }
    // Unclosed lists r ignored... wiki have no special start/end lists syntax....
    // OK. Things remains to do:
    // 1) fix linked images
    $out_data = preg_replace(',\\[(.*)\\|\\(img src=(.*)\\)\\],mU', '{img src=$2 link=$1}', $out_data);
    // 2) fix remains images (not in links)
    $out_data = preg_replace(',\\(img src=(.*)\\),mU', '{img src=$1}', $out_data);
    // 3) remove empty lines
    $out_data = preg_replace(",[\n]+,mU", "\n", $out_data);
    // 4) remove nbsp's
    $out_data = preg_replace(",&#160;,mU", " ", $out_data);
    return $out_data;
}

示例#20

0

显示文件

文件： index.php 项目： dfanica/Daniel-Fanica-Personal-Website

     }
     // generate name with timestamp
     $mpdf->Output('DANIELFANICA_CV_' . date('YmdHis') . '.pdf', $pdfAction);
     print_r($mpdf);
     break;
     /**
      * EXPERIMENTAL FEATURE!!!
      */
 /**
  * EXPERIMENTAL FEATURE!!!
  */
 case '/cv/docx':
     if (ENVIRONMENT !== 'production') {
         VsWord::autoLoad();
         $doc = new VsWord();
         $parser = new HtmlParser($doc);
         $contents = file_get_contents("http://{$_SERVER['SERVER_NAME']}/cv");
         $parser->parse($contents);
         // echo "<pre>{$doc->getDocument()->getBody()->look()}</pre>";
         $timestamp = time();
         $doc->saveAs("docx/danielfanica_cv_demo_{$timestamp}.docx");
     }
     break;
 default:
     if (ENVIRONMENT !== 'development') {
         // how far behind to we want to look for data? (in days)
         $prev_no_days = 90;
         // https://gist.github.com/daitr-gu/472c4f18522172542cca
         // http://stackoverflow.com/questions/29742460/how-to-get-steps-count-with-google-fit-api-in-php
         $client = new Google_Client();
         $client->setClientId($var['google']['fitness_api']['client_id']);

示例#21

0

显示文件

文件： divcount.php 项目： hardanro/Parser

    }
}
class HtmlElements
{
    protected $elements = array();
    /**
     * addChild elements to the collection
     *
     * @param DOMElement $elementCode
     */
    public function addChild($element)
    {
        $this->elements[] = $element;
    }
    /**
     * getElements method - get element collection found in the source
     *
     * @return int
     */
    public function count()
    {
        return count($this->elements);
    }
}
try {
    $dom = new HtmlParser('http://www.bbc.com/');
    $elements = $dom->getElements('div');
    echo $elements->count();
} catch (Exception $e) {
    echo $e->getMessage();
}

示例#22

0

显示文件

文件： BitPdf.php 项目： bitweaver/pdf

 function insert_html(&$data)
 {
     // Strip out HTML comments which don't get parsed right
     $data = preg_replace('#<!.*?[^>]>#', '', $data);
     // new code starts here
     // read grammar
     //  $grammarfile='lib/htmlparser/htmlgrammar.cmp';
     //  if(!$fp=@fopen($grammarfile,'r')) die();
     //  $grammar=unserialize(fread($fp,filesize($grammarfile)));
     //  fclose($fp);
     //vd( $data );
     // create parser object and insert html code
     $htmlparser = new HtmlParser($data, $this->html_grammar, '', 0);
     // parse it
     $htmlparser->Parse();
     //debug output
     //vd( $htmlparser->content );
     // now set it together
     $src = '';
     $dummy = array();
     $this->WalkParsedArray($htmlparser->content, $src, $dummy);
     /*
     echo "<pre>";
     echo "Walk array:\n\n";
     echo $src;
     echo "</pre>";
     die();
     */
     $this->flush($src);
     // new code ends here
     /* old code starts here
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data before parsing:\n$data\n");fclose($fpd);
     	  //parse data
     	
     	  //replace <br/>
     	  $data=preg_replace("#<br/>#","\n",$data);
     	  // titlebar
     	  $data=preg_replace("#<div class=['\"]titlebar['\"]>(.+)</div>#","<C:titlebar:\$1>",$data);
     	  //$data=preg_replace("#<div class='titlebar'>(.+)</div>#e","'<C:titlebar:\$1>'.$this->add_linkdestination('$1')",$data);
     	  //line
     	  $data=preg_replace("#<hr/>#","<C:hr:>",$data);
     	  //headings
     	  $data=preg_replace("#<h1>(.+)</h1>#","<C:h1:\$1>",$data);
     	  $data=preg_replace("#<h2>(.+)</h2>#","<C:h2:\$1>",$data);
     	  $data=preg_replace("#<h3>(.+)</h3>#","<C:h3:\$1>",$data);
     	  //images
     	  $data=preg_replace("#<img(.+)src=[\"\']([^\"|^\']+)[\"\'].*\\>#","<C:img:\$2>",$data);
     	  //links
     	  $data=preg_replace("#<a.+href=[\"\']([^\"|^\']+)[\"\'].*>(.*)</a>#e","\$this->whatlink('$1','$2')",$data);
     	
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"before adding text\n");fclose($fpd);
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"data:\n$data\n");fclose($fpd);
     	  $this->ezText($data,$this->mSettings['textheight']);
     	  //$fpd=fopen("/tmp/tikidebug",'a');fwrite($fpd,"after adding text\n");fclose($fpd);
     	  iold code ends here */
 }

示例#23

0

显示文件

文件： edit.php 项目： bitweaver/wiki

 }
 $sdta = @file_get_contents($suck_url);
 if (isset($php_errormsg) && strlen($php_errormsg)) {
     $gBitSystem->fatalError(tra("Can't import remote HTML page"));
 }
 // Need to parse HTML?
 if ($parsehtml == 'y') {
     // Read compiled( serialized ) grammar
     $grammarfile = UTIL_PKG_PATH . 'htmlparser/htmlgrammar.cmp';
     if (!($fp = @fopen($grammarfile, 'r'))) {
         $gBitSystem->fatalError(tra("Can't parse remote HTML page"));
     }
     $grammar = unserialize(fread($fp, filesize($grammarfile)));
     fclose($fp);
     // create parser object, insert html code and parse it
     $htmlparser = new HtmlParser($sdta, $grammar, '', 0);
     $htmlparser->Parse();
     // Should I try to convert HTML to wiki?
     $parseddata = '';
     $p = array('stack' => array(), 'listack' => array(), 'first_td' => false);
     walk_and_parse($htmlparser->content, $parseddata, $p);
     // Is some tags still opened?( It can be if HTML not valid, but this is not reason
     // to produce invalid wiki : )
     while (count($p['stack'])) {
         $e = end($p['stack']);
         $sdta .= $e['string'];
         array_pop($p['stack']);
     }
     // Unclosed lists r ignored... wiki have no special start/end lists syntax....
     // OK. Things remains to do:
     // 1 ) fix linked images

示例#24

0

显示文件

文件： Links.php 项目： Nubtehy/openProjects

//get finish
$finish = microtime(true);
$total_time = round($finish - $start, 4);
$resp->url = $_GET['arg0'];
$resp->start = $start;
$resp->finish = $finish;
$resp->totalSeconds = $total_time;
//no follow?
$obeyNoFollow = true;
if (isset($_GET['arg1']) && $_GET['arg1'] == 'false') {
    $obeyNoFollow = false;
}
//host
$host = Utils::getBaseHost($_GET['arg0']);
//parse the page
$parser = new HtmlParser($result, $_GET['arg0']);
$links = $parser->getTags('a');
$resp->links = array();
foreach ($links as $node) {
    if (!empty($node->attributes['href']) && !(isset($node->attributes['rel']) && strtolower($node->attributes['rel']) === 'nofollow' && $obeyNoFollow)) {
        $link = $node->attributes['href'];
        //validate hosts
        $lHost = Utils::getBaseHost($link);
        if (empty($lHost)) {
            $lHost = $host;
        }
        if ($lHost !== $host) {
            break;
        }
        $normal = Utils::normalizePath($link, $_GET['arg0']);
        if (!in_array($normal, $resp->links)) {

示例#25

0

显示文件

文件： ex_dumptags.php 项目： aydancoskun/octobercms

<?php

include "htmlparser.inc";
$htmlText = "<html><!-- comment --><body>This is the body</body></html>";
$parser = new HtmlParser($htmlText);
while ($parser->parse()) {
    echo "-----------------------------------\r\n";
    echo "Node type: " . $parser->iNodeType . "\r\n";
    echo "Node name: " . $parser->iNodeName . "\r\n";
    echo "Node value: " . $parser->iNodeValue . "\r\n";
}

示例#26

0

显示文件

文件： Webbot.class.php 项目： nebtrx/stratos

 /**
  * Parses links of a given url page to donwload
  * 
  * @param string $url             Page Url to donwload and harvest links
  * @param string $referer         Url referer to register when donwloading page
  * @param string $open_tag        Links open tags
  * @param string $close_tag       Links close tags
  * @return array                  Array of links
  */
 protected function harvestLinks($url, $referer, $open_tag, $close_tag)
 {
     $links = array();
     # Get page base for $url
     $page_base = Resolver::getBasePageAddress($url);
     if ($this->config['webbot']['base_domain_relative_links'] == TRUE) {
         $page_base = Resolver::getBaseDomainAddress($page_base);
     }
     # Download webpage
     $downloaded_page = $this->downloadPage($url, $referer);
     // esto se puede pponer en una configuracion
     $anchor_tags = HtmlParser::parse2Array($downloaded_page['FILE'], $open_tag, $close_tag);
     # Put http attributes for each tag into an array
     for ($i = 0; $i < count($anchor_tags); $i++) {
         $href = HtmlParser::getAttribute($anchor_tags[$i], "href");
         //echo $links[$i]."<br>";
         $resolved_addres = Resolver::resolveAddress($href, $page_base);
         $links[] = $resolved_addres;
         //logging
         $this->logActivity("Harvested: " . $resolved_addres);
     }
     return $links;
 }

示例#27

0

显示文件

文件： html_table.php 项目： rohmad-st/fpdf

 function ParseTable($Table)
 {
     $_var = '';
     $htmlText = $Table;
     $parser = new HtmlParser($htmlText);
     while ($parser->parse()) {
         if (strtolower($parser->iNodeName) == 'table') {
             if ($parser->iNodeType == NODE_TYPE_ENDELEMENT) {
                 $_var .= '/::';
             } else {
                 $_var .= '::';
             }
         }
         if (strtolower($parser->iNodeName) == 'tr') {
             if ($parser->iNodeType == NODE_TYPE_ENDELEMENT) {
                 $_var .= '!-:';
             } else {
                 $_var .= ':-!';
             }
             //closing row
         }
         if (strtolower($parser->iNodeName) == 'td' && $parser->iNodeType == NODE_TYPE_ENDELEMENT) {
             $_var .= '#,#';
         }
         if ($parser->iNodeName == 'Text' && isset($parser->iNodeValue)) {
             $_var .= $parser->iNodeValue;
         }
     }
     $elems = explode(':-!', str_replace('/', '', str_replace('::', '', str_replace('!-:', '', $_var))));
     //opening row
     foreach ($elems as $key => $value) {
         if (trim($value) != '') {
             $elems2 = explode('#,#', $value);
             array_pop($elems2);
             $data[] = $elems2;
         }
     }
     return $data;
 }

示例#28

0

显示文件

文件： EmbedFilter.class.php 项目： kkkyyy03/coffeemix

 /**
  * Check param tag in the content.
  * @return void
  */
 function checkParamTag(&$content)
 {
     preg_match_all('/<\\s*param\\s*[^>]+(?:\\/?>?)/is', $content, $m);
     $paramTagList = $m[0];
     if ($paramTagList) {
         foreach ($paramTagList as $key => $paramTag) {
             $isWhiteDomain = TRUE;
             $isWhiteExt = TRUE;
             $ext = '';
             $parser = new HtmlParser($paramTag);
             while ($parser->parse()) {
                 if ($parser->iNodeAttributes['name'] && $parser->iNodeAttributes['value']) {
                     $name = strtolower($parser->iNodeAttributes['name']);
                     if ($name == 'movie' || $name == 'src' || $name == 'href' || $name == 'url' || $name == 'source') {
                         $ext = strtolower(substr(strrchr($parser->iNodeAttributes['value'], "."), 1));
                         $isWhiteDomain = $this->isWhiteDomain($parser->iNodeAttributes['value']);
                         if (!$isWhiteDomain) {
                             $content = str_replace($paramTag, htmlspecialchars($paramTag, ENT_COMPAT | ENT_HTML401, 'UTF-8', false), $content);
                         }
                     }
                 }
             }
         }
     }
 }

示例#29

0

显示文件

文件： gan_parser_html.php 项目： tburry/pquery

 protected function parse_hierarchy($self_close = null)
 {
     $tag_curr = strtolower($this->status['tag_name']);
     if ($self_close === null) {
         $this->status['self_close'] = $self_close = isset($this->tags_selfclose[$tag_curr]);
     }
     if (!($self_close || $this->status['closing_tag'])) {
         //$tag_prev = strtolower(end($this->hierarchy)->tag);
         $tag_prev = strtolower($this->hierarchy[count($this->hierarchy) - 1]->tag);
         if (isset($this->tags_optional_close[$tag_curr]) && isset($this->tags_optional_close[$tag_curr][$tag_prev])) {
             array_pop($this->hierarchy);
         }
     }
     return parent::parse_hierarchy($self_close);
 }

示例#30

0

显示文件

文件： w3sTemplate.class.php 项目： jmp0207/w3studiocms

 /**
  * Returns the available classes for a given slot. Can retrieve only the class
  * name or the full CSS style. This is made with the mode parameter
  *
  * @param      str  The slot's name.
  * @param      int  optional 0 retrieves only the class name [Default]
  *                           1 Retrieve the full css style
  *
  * @return     array  The found classes
  */
 public static function findStylesheetClasses($content, $mode = 0)
 {
     // This is only a paliative solution. Hope someone can fix the parse class: I don't know Call-time pass-by-reference
     ini_set('error_reporting', 'E_ERROR');
     require_once dirname(__FILE__) . '/../tools/parser/htmlparser.inc';
     require_once dirname(__FILE__) . '/../tools/parser/common.inc';
     $slotName = $content->getW3sSlot()->getSlotName();
     $page = $content->getW3sPage();
     // Opens the template and parses its structure
     $templateAttributes = self::retrieveTemplateAttributesFromPage($page);
     $templateFile = self::getTemplateFile($templateAttributes["projectName"], $templateAttributes["templateName"]);
     $p = new HtmlParser($templateFile, unserialize(Read_File("parser/htmlgrammar.cmp")), $templateFile, 1);
     $p->Parse();
     $src = "";
     GetPageSrc($p->content, $src);
     ob_start();
     PrintArray($p->content);
     $contents = ob_get_clean();
     // Finds the id of Slots
     $i = 1;
     $elements = array($slotName);
     while (1) {
         preg_match('/(.*)\\[content\\].*\\[pars\\]\\[id\\]\\[value\\]=' . $slotName . '/', $contents, $res);
         if (count($res) == 0) {
             break;
         }
         $startKey = str_replace("[", "\\[", $res[1]);
         $startKey = str_replace("]", "\\]", $startKey);
         preg_match('/' . $startKey . '\\[pars\\]\\[id\\]\\[value\\]=(.*)/', $contents, $res);
         $elements[] = $res[1];
         $slotName = $res[1];
         $i++;
         // Prevents blocks if an infinite loop occours if a non well-format template is searched
         if ($i == 100) {
             break;
         }
     }
     // Finds all the template's stylesheets
     $fp = fopen($templateFile, "r");
     $templateContents = fread($fp, filesize($templateFile));
     fclose($fp);
     $templateContents = str_replace("\r\n", "", $templateContents);
     preg_match_all('/.*?rel=["|\']stylesheet["|\'].*?href\\s*=\\s*["|\'](.*?)["|\'].*?/', $templateContents, $stylesheets);
     // Creates a single stylesheet from the stylesheets retrieved
     $contents = '';
     foreach ($stylesheets[1] as $stylesheet) {
         $stylesheet = substr($stylesheet, 1, strlen($stylesheet));
         $fp = fopen($stylesheet, "r");
         $currentContent = fread($fp, filesize($stylesheet));
         fclose($fp);
         $currentContent = str_replace("\r\n", "", $currentContent);
         $currentContent = preg_replace('/HTML>.*?}+?/', '', $currentContent);
         $contents .= $currentContent;
     }
     // Find classes from xhtml elements
     $result = $mode == 0 ? array('w3sNone' => 'None') : array();
     foreach ($elements as $element) {
         $expression = $mode == 0 ? '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*\\.(.*?)\\{+?/' : '/#' . trim($element) . '[a-zA-Z0-9-_:\\s]*(\\..*?\\{.*?\\})+?/';
         preg_match_all($expression, $contents, $classes);
         foreach ($classes[1] as $class) {
             if ($mode == 0) {
                 $result[$class] = $class;
             } else {
                 $result[] = $class;
             }
         }
     }
     // Find classes not associated to xhtml elements
     $expression = $mode == 0 ? '/(^|})\\.(.*?)\\{+?/' : '/(^|})(\\..*?\\{.*?\\})+?/';
     preg_match_all($expression, $contents, $classes);
     foreach ($classes[2] as $class) {
         if ($mode == 0) {
             $result[$class] = $class;
         } else {
             $result[] = $class;
         }
     }
     return $result;
 }

PHP HtmlParser示例