コード例 #1
0
ファイル: tests.php プロジェクト: jasherai/libwebta
 function testData_Text_HTMLParser()
 {
     $content = "<a href='#'>Go Daddy</a><attr name='attr'>ATTR</attr>\r\n\t\t\t\t\t\t<p align=\"center\"><a href=\"http://www.myspace.com/declareyourself\" target=\"_blank\">\r\n\t\t\t\t\t\t<img src=\"http://creative.myspace.com/groups/_jc/declareyourself/dy_badge.jpg\" border=\"0\" />\r\n\t\t\t\t\t\t</a></p>";
     $res = HTMLParser::StripTags($content);
     $this->assertEqual($res, strip_tags($content), "Error while stripping all tags");
     $res = HTMLParser::StripTags($content, 'attr');
     $this->assertFalse(stristr($res, '<attr'), "Error while stripping [attr] tag");
     $nolinks = HTMLParser::StripTags($content, 'a');
     $this->assertFalse(stristr($nolinks, 'href'), "Error while stripping [a] tag");
     $nolinks = HTMLParser::StripLinks($content);
     $this->assertFalse(stristr($nolinks, 'href'), "Error while stripping links");
     $res = HTMLParser::StripScripts($content);
     $this->assertEqual($res, $content, "Error while stripping scripts");
     $res = HTMLParser::StripTags($content, 'img');
     $this->assertFalse(stristr($res, 'img'), "Error while stripping [img] tag");
 }
コード例 #2
0
		/**
		 * Get array of educations
		 * 
		 */
		public function GetEducation()
		{
			$patterns = array('/\<div[\s\t]+name\=\"education\"[\s\t]+id\=\"[a-z0-9]+\"[^\>]*\>(.*?)\<\/div\>[\s\t\n\r]+\<h2/msi');
			
			
			// get education block
			$Education = $this->GetMatches($patterns);
			$Education = HTMLParser::StripLinks($Education);
			$Education = preg_replace("/\<p[^\>]+\>[\s\t\n\r]*\<em\>[\s\t\n\r]*Activities and Societies\:.*?\<\/p\>/ms", "", $Education);
			
			return $Education;
		}
コード例 #3
0
		/**
		 * Parse personal details from content
		 * 
		 * @return array Profile details
		 * Example of output array
		 * <code>
		 * 	[Headline] => "feel easy to live"
		 *  [City]  => www.fakaofo.tk - Fakaofo
		 *  [State]  => State Info
		 *  [Country]  => Tokelau
		 *  [Last  Login] => 11/27/2006
		 *  [Age]  => 25
		 *  [Sex]  => Female
		 *  [Profile  Views] =>
		 *  [Online]  => 1
		 *  [Status]  => Single
		 *  [Here  for] => Networking, Dating, Friends
		 *  [Orientation]  => Not Sure
		 *  [Hometown]  => <a href="http://www.fakaofo.tk/">Fakaofo</a>
		 *  [Body  type] => 5' 2" / Athletic
		 *  [Ethnicity]  => Pacific Islander
		 *  [Religion]  => Other
		 *  [Zodiac  Sign] => <a href="http://collect.myspace.com/index.cfm?
		 * fuseaction=horoscope&sign=11&MyToken=5e6d132b-f9da-4df1-880d-
		 * 06d4a19d0fd2">Aquarius</a>
		 *  [Smoke  / Drink] => Yes / Yes
		 *  [Children]  => Love kids, but not for me
		 *  [Education]  => High school
		 *  [Occupation]  => Stranger
		 *  [General]  => 1. sex 2. drugs 3. rock & roll
		 *  [Music]  => rap, classic music
		 *  [Movies]  => matrix
		 *  [Television]  => mtv
		 *  [Books]  => udar russkix bogov
		 *  [Heroes]  => no heros
		 * </code>
		 * 
		 * @access public
		 */
		function GetPersonalDetails()
		{
			$details = array();
			
	
			$patterns = array(
				'/class\s*\=\s*(?:\'|\")nametext.*?\<table[^\>]+\>(.*?)\<\/table\>/msi'
			);
			
			//
			// Match personal info from top block
			//
			$this->PersonalInfoBlock = $this->GetMatches($patterns);
			
			if (preg_match('/\<td[^\>]+width\s*\=\s*\"193\"[^\>]*\>(.*?)\<\/td\>/msi', $this->PersonalInfoBlock, $matches))
			{
				$entries = preg_split('/\<br[^\>]*\>/ims', $matches[1]);
				
				//
				// parse entries
				//
				if ($entries)
				{
					$entries = array_map('trim', $entries);
					
					if (!in_array(trim($entries[2]), array('Male', 'Female')))
					{
						$this->Headline = $entries[1];
						$location = explode(",", $entries[3], 2);
						$this->City = trim($location[0]);
						$this->State = trim($location[1]);
						$this->Country = $entries[4];
						$this->ProfileViews = preg_replace('/[^0-9]+/msi', '', $entries[6]);
						$this->LastLogin = preg_replace('/[^0-9\/]+/msi', '', $entries[10]);
						if (stristr($entries[7], 'OnlineNow') || stristr($entries[8], 'OnlineNow'))
							$this->Online = true;
					}
					else
					{
						$this->Headline = $entries[0];
						$this->ProfileViews = preg_replace('/[^0-9]+/msi', '', $entries[1]);
						$this->Sex = $entries[2];
						$this->Age = preg_replace('/[^0-9]+/msi', '', $entries[3]);
						$location = explode(",", $entries[4], 2);
						$this->City = trim($location[0]);
						$this->State = trim($location[1]);
						$this->Country = $entries[5];
						$this->LastLogin = preg_replace('/[^0-9\/]+/msi', '', $entries[8]);
						if (stristr($entries[7], 'OnlineNow'))
							$this->Online = true;
					}
					
					$details = array(
						'Headline' 	=> $this->Headline,
						'City'		=> $this->City,
						'State' 	=> $this->State,
						'Country' 	=> $this->Country,
						'Last Login' => $this->LastLogin,
						'Age'		=> $this->Age,
						'Sex'		=> $this->Sex,
						'Profile Views'		=> $this->ProfileViews,
						'Online'	=> $this->Online
					);
				}
			}
			
			
			
			$patterns = array(
				'/\'s Details.*?\<table[^\>]+\>(.*?)\<\/table/msi'
			);
		
			//
			// Match personal info from middle block - Name's Details
			//
			$this->DetailsBlock = $this->GetMatches($patterns);
			
			if ($this->DetailsBlock)
			{
				$entries = preg_split('/\<\/tr[^\>]*\>/ims', $this->DetailsBlock);
				
				//
				// parse entries
				//
				if ($entries)
				{
					$entries = array_map('trim', $entries);
					
					foreach($entries as $entry)
					{
						if (!$entry) continue;
						if (preg_match('/\<span[^\>]+\>([^\<]+)\<\/span.*?\<td[^\>]+\>(.*?)\<\/td/msi', $entry, $match))
						{
							$key = str_replace(':', '', trim($match[1]));
							$details[$key] = HTMLParser::StripLinks(trim($match[2]));
						}
					}
					
				} // end if entries
			}
			


			$patterns = array(
				'/\'s Interests.*?\<([0-9]+)table[^\>]+\>(.*?)\<\\1\/table/msi'
			);
			
			
			//
			// Match Interests block information
			//
			$this->InterestsBlock = $this->GetMatches($patterns, true);
			
			if ($this->InterestsBlock)
			{
				$entries = preg_split('/<tr[\s\t]+id\=[\'\"a-z]+Row[^>]*>/ims', $this->InterestsBlock);
				
				//
				// parse entries
				//
				if ($entries)
				{
					$entries = array_map('trim', $entries);
					
					foreach($entries as &$entry)
					{
						if (!$entry) continue;
						
						if (preg_match('/<[0-9]+span[^>]+>([^<]+)<[0-9]+\/span.*?<([0-9]+)td[^>]+>(.*?)<\\2\/td/msi', HTMLParser::AddTagDepth($entry), $match))
						{
							$match[3] = HTMLParser::RemoveTagDepth(trim($match[3]));
							$key = HTMLParser::RemoveTagDepth(trim($match[1]));
							$key = str_replace(':', '', $key);
							
							// remove link to all groups
							if ($key == 'Groups')
								$match[3] = preg_replace('/\<br[^\>]*\>\<br[^\>]*\>.*$/msi', '', $match[3]);
							
							$details[$key] = HTMLParser::StripBlankLinks(trim($match[3]));
						}
					}
					
				} // end if entries
			}
			
			
			
			return $details;
		}
コード例 #4
0
ファイル: class.LinkedIn.php プロジェクト: rchicoria/epp-drs
		function GetConnectionsList() 
		{
			if (!$this->LoggedIn) return false;
			
			$page = self::CONNECTIONS_URL;
			
			$this->Fetch($page);

			if (!$this->Result)
				return false;
			
			preg_match("/\"numConnections\"[^\>]*\>([0-9]+)\</msi", $this->Result, $match);
			$total_connections = $match[1] ? $match[1] : 0;
			
			preg_match_all("/\<tr [^\>]*name\=\"connection\".*?\_connection([0-9]+).*?name\=\"fullName\"[^\>]*\>(.*?)\<\/strong\>.*?\"email\"[^\>]*\>([^\@]+\@[^\@]+\.[a-z]{2,6})\<.*?\<\/tr\>/msi", $this->Result, $matches, PREG_SET_ORDER);
			
			$connections = array();
			
			foreach($matches as $match)
			{
				array_push($connections, array(
					'id'	=> $match[1],
					'name'	=> HTMLParser::StripLinks($match[2]),
					'email'	=> $match[3]
				));
			}
			
			return $connections;
		}
コード例 #5
0
		/**
		 * Parse personal details from content
		 * 
		 * @return array profile details
		 */
		function GetPersonalDetails()
		{
			if (!$this->Result) return;
			
			$details = array();
						
			$pattern = '/<li>[\s\t\r\n]*<strong>[\s\t\r\n]*(age|gender|industry|occupation|location)\:[\s\t\r\n]*<\/strong>[\s\t\r\n]*(.*?)[\s\t\r\n]*<\/li>/msi';
			
			preg_match_all($pattern, $this->Result, $matches, PREG_SET_ORDER);
			 			
			foreach($matches as $match)
			{
				switch ($match[1])
				{
					case 'Age':
						$details['age'] = $match[2];
						break;
					
					case 'Gender':
						$details['gender'] = $match[2];
						break;
					
					case 'Location':
						$locations = explode(":", $match[2]);
						$locations = array_map('trim', $locations);
						
						$details['city'] = HTMLParser::StripLinks($locations[0]);
						$details['state'] = HTMLParser::StripLinks($locations[1]);
						$details['country'] = HTMLParser::StripLinks($locations[2]);
						break;
					
					case 'Industry':
						$details['industry'] = $match[2];
						break;
					
					case 'Occupation':
						$details['occupation'] = $match[2];
						break;
					
				}
			}
			
			
			$pattern = '/<h2>(About\sMe|Interests)<\/h2>[\s\t\r\n]*<([a-z]+)\b[^>]*>(.*?)<\/\\2>/msi';
			preg_match_all($pattern, $this->Result, $matches, PREG_SET_ORDER);
			 			
			foreach($matches as $match)
			{
				switch ($match[1])
				{
					case 'About Me':
						$details['aboutme'] = $match[3];
						break;
					
					// #todo - separators needed
					case 'Interests':
						$details['interests'] = HTMLParser::StripTags($match[3]);
						break;
				}
			}			

			return $details;
		}