コード例 #1
0
ファイル: tests.php プロジェクト: jasherai/libwebta
 function testData_Text_HTMLParser()
 {
     $content = "<a href='#'>Go Daddy</a><attr name='attr'>ATTR</attr>\r\n\t\t\t\t\t\t<p align=\"center\"><a href=\"http://www.myspace.com/declareyourself\" target=\"_blank\">\r\n\t\t\t\t\t\t<img src=\"http://creative.myspace.com/groups/_jc/declareyourself/dy_badge.jpg\" border=\"0\" />\r\n\t\t\t\t\t\t</a></p>";
     $res = HTMLParser::StripTags($content);
     $this->assertEqual($res, strip_tags($content), "Error while stripping all tags");
     $res = HTMLParser::StripTags($content, 'attr');
     $this->assertFalse(stristr($res, '<attr'), "Error while stripping [attr] tag");
     $nolinks = HTMLParser::StripTags($content, 'a');
     $this->assertFalse(stristr($nolinks, 'href'), "Error while stripping [a] tag");
     $nolinks = HTMLParser::StripLinks($content);
     $this->assertFalse(stristr($nolinks, 'href'), "Error while stripping links");
     $res = HTMLParser::StripScripts($content);
     $this->assertEqual($res, $content, "Error while stripping scripts");
     $res = HTMLParser::StripTags($content, 'img');
     $this->assertFalse(stristr($res, 'img'), "Error while stripping [img] tag");
 }
コード例 #2
0
		/**
		 * Parse personal details from content
		 * 
		 * @return array profile details
		 * sample
		 *   [userpic]   => http://www.livejournal.com/userpic/38353247/8981002
		 *   [name]   => Natalie
		 *   [website]   => http://www.myspace.com/aggressiva
		 *   [city]   => La Verne
		 *   [state]   => California
		 *   [country]   => United States
		 *   [birthday] => 1971-12-15
		 *   [aboutme]	=> party ... I just believe in parties!
		 */
		function GetPersonalDetails()
		{
			$details = array();
						
			$pattern = '/><[0-9]*b>(user|name|website|location|birthdate|gizmo\/lj talk|bio|e\-mail)\:<[0-9]*\/b><[0-9]*\/td><([0-9]*)td[^>]*>(.*?)<\\2\/td>/msi';
			
			$this->Result = HTMLParser::AddTagDepth($this->Result);
			preg_match_all($pattern, $this->Result, $matches, PREG_SET_ORDER);
			$this->Result = HTMLParser::RemoveTagDepth($this->Result);
			
			foreach($matches as $match)
			{
				$match[2] = HTMLParser::RemoveTagDepth(trim($match[3]));
				
				switch ($match[1])
				{
					case 'Name':
						$details['name'] = $match[2];
						break;
					
					case 'Website':
						if (preg_match("/href=(\'|\")(.*?)\\1/", $match[2], $match2))
						{
							$details['website'] = $match2[2];
						}
						break;
					
					case 'Location':
						preg_match_all("/loc\_(ci|st|cn)\=[^>\&]+>(.*?)</msi", $match[2], $match2, PREG_SET_ORDER);
						
						foreach($match2 as $res)
						{
							if ($res[1] == 'ci')
								$details['city'] = $res[2];
							elseif ($res[1] == 'st')
								$details['state'] = $res[2];
							elseif ($res[1] == 'cn')
								$details['country'] = $res[2];
						}
						break;
					
					case 'Bio':
						$details['aboutme'] = $match[2];
						break;
					
					case 'Birthdate':
						$details['birthday'] = $match[2];
						break;
					
					case 'E-mail':
						$details['email'] = HTMLParser::StripTags($match[2]);
						
						break;
				}
			}
			
			
			if (preg_match("/http\:\/\/([a-z0-9\-]+\.)+[a-z0-9]{2,6}\/userpic\/[0-9]+\/[0-9]+/msi", $this->Result, $matches))
			{
				$details['userpic'] = $matches[0];
			}
			
			//echo '<xmp>'; print_r($details); echo '</xmp>'; exit;
			return $details;
		}
コード例 #3
0
ファイル: class.DiffTool.php プロジェクト: rchicoria/epp-drs
		/**
		 * Find differences between multiline strings and return formatted new
		 * string
		 * 
		 * @param string $string_old Old string
		 * @param string $string_new New string
		 * @return string New formatted string
		 * @uses HTMLParser HTML Parser method StripTags
		 * @access public
		 */
		public function GetHighlitedDiff($string_old, $string_new) 
		{
			$string_new = HTMLParser::StripTags($string_new);
			$string_old = HTMLParser::StripTags($string_old);
			
			$patch = $this->Diff($string_old, $string_new);

			if ($patch)
			{
				$patch = preg_replace($this->Statements, $this->Replacements, $patch);
				$string_new = $this->Patch($string_old, $patch);
			}

			return $string_new;
		}
コード例 #4
0
		/**
		 * Parse personal details from content
		 * 
		 * @return array profile details
		 */
		function GetPersonalDetails()
		{
			if (!$this->Result) return;
			
			$details = array();
						
			$pattern = '/<li>[\s\t\r\n]*<strong>[\s\t\r\n]*(age|gender|industry|occupation|location)\:[\s\t\r\n]*<\/strong>[\s\t\r\n]*(.*?)[\s\t\r\n]*<\/li>/msi';
			
			preg_match_all($pattern, $this->Result, $matches, PREG_SET_ORDER);
			 			
			foreach($matches as $match)
			{
				switch ($match[1])
				{
					case 'Age':
						$details['age'] = $match[2];
						break;
					
					case 'Gender':
						$details['gender'] = $match[2];
						break;
					
					case 'Location':
						$locations = explode(":", $match[2]);
						$locations = array_map('trim', $locations);
						
						$details['city'] = HTMLParser::StripLinks($locations[0]);
						$details['state'] = HTMLParser::StripLinks($locations[1]);
						$details['country'] = HTMLParser::StripLinks($locations[2]);
						break;
					
					case 'Industry':
						$details['industry'] = $match[2];
						break;
					
					case 'Occupation':
						$details['occupation'] = $match[2];
						break;
					
				}
			}
			
			
			$pattern = '/<h2>(About\sMe|Interests)<\/h2>[\s\t\r\n]*<([a-z]+)\b[^>]*>(.*?)<\/\\2>/msi';
			preg_match_all($pattern, $this->Result, $matches, PREG_SET_ORDER);
			 			
			foreach($matches as $match)
			{
				switch ($match[1])
				{
					case 'About Me':
						$details['aboutme'] = $match[3];
						break;
					
					// #todo - separators needed
					case 'Interests':
						$details['interests'] = HTMLParser::StripTags($match[3]);
						break;
				}
			}			

			return $details;
		}