function parse_array($arr)
{
    foreach ($arr as $key => $value) {
        $db = DB::select()->from('categories')->where('id', '=', $key)->execute();
        echo '<ul><li>';
        echo '<b>Название:</b> ' . form::input('name[]', $db[0]['name'], array('onkeyup' => 'translit(\'name[]\', \'url[]\');')) . form::hidden('cid[]', $db[0]['id']) . '&nbsp;';
        echo '<b>URL:</b> ' . form::input('url[]', $db[0]['url']) . '&nbsp;';
        echo '<b>Удалить?</b> ' . form::checkbox('delete[]', $db[0]['id']);
        echo '</li>';
        if (is_array($value)) {
            parse_array($value);
        }
        echo '</ul>';
    }
}
Example #2
0
  function __construct($parse_text_k=null, $parse_text_v=null) {
    if(is_array($parse_text_k)) {
      $this->data=$parse_text_k;
    }
    elseif($parse_text_k==null) {
      $this->data=array();
    }
    else {
      $ks=parse_array($parse_text_k);
      $vs=parse_array($parse_text_v);

      $this->data=array();
      for($i=0; $i<sizeof($ks);$i++) {
	$this->data[$ks[$i]]=$vs[$i];
      }
    }
  }
function download_parse_rss($target)
{
    # download tge rss page
    $news = http_get($target, "");
    # Parse title & copyright notice
    $rss_array['TITLE'] = return_between($news['FILE'], "<title>", "</title>", EXCL);
    $rss_array['COPYRIGHT'] = return_between($news['FILE'], "<copyright>", "</copyright>", EXCL);
    # Parse the items
    $item_array = parse_array($news['FILE'], "<item>", "</item>");
    for ($xx = 0; $xx < count($item_array); $xx++) {
        $rss_array['ITITLE'][$xx] = return_between($item_array[$xx], "<title>", "</title>", EXCL);
        $rss_array['ILINK'][$xx] = return_between($item_array[$xx], "<link>", "</link>", EXCL);
        $rss_array['IDESCRIPTION'][$xx] = return_between($item_array[$xx], "<description>", "</description>", EXCL);
        $rss_array['IPUBDATE'][$xx] = return_between($item_array[$xx], "<pubDate>", "</pubDate>", EXCL);
    }
    return $rss_array;
}
function get_encoding($strHTML, $header = false)
{
    $enc = "";
    if ($header) {
        //try to pull encoding from header information
        $strHeader = substr($strHTML, 0, strpos($strHTML, "<"));
        //looking for line Content-Type: text/html; charset=utf-8
        $pos = strpos($strHeader, "charset=");
        if ($pos !== FALSE) {
            $pos2 = strpos($strHeader, "\n", $pos);
            $enc = substr($strHeader, $pos + 8, $pos2 - $pos - 8);
        }
    }
    #$head_section = return_between($string=$strHTML, $start="<head>", $end="</head>", $type=EXCL);
    # Create an array of all the meta tags
    $meta_tag_array = parse_array($strHTML, $beg_tag = "<meta", $close_tag = ">");
    $new_page = "";
    # Examine each meta tag for a redirection command
    for ($xx = 0; $xx < count($meta_tag_array); $xx++) {
        # Look for http-equiv attribute
        $meta_attribute = get_attribute($meta_tag_array[$xx], $attribute = "http-equiv");
        #echo $meta_tag_array[$xx] . "\n";
        if (strtolower($meta_attribute) == "content-type") {
            #echo "HERE!";
            $new_page = return_between($meta_tag_array[$xx], $start = "charset", $end = ">", $type = EXCL);
            # Clean up URL
            $new_page = trim(str_replace("", "", $new_page));
            $new_page = str_replace("/", "", $new_page);
            $new_page = str_replace("=", "", $new_page);
            $new_page = str_replace("\"", "", $new_page);
            $new_page = str_replace("'", "", $new_page);
            $new_page = str_replace(" ", "", $new_page);
            break;
        }
    }
    if (strlen($enc) > 0 && strlen($new_page) > 0) {
        return $enc . "," . $new_page;
    } elseif (strlen($new_page) > 0) {
        return $new_page;
    } elseif (strlen($enc) > 0) {
        return $enc;
    } else {
        return "";
    }
}
Example #5
0
 public function getInfo()
 {
     $rawInfo = self::getRaw($this->action, $this->ref, $this->data);
     //import('SHD.simple_html_dom');
     $infoArray = parse_array($rawInfo['FILE'], '<tr>', '</tr>');
     $tr = [];
     $this->page = end($infoArray);
     foreach ($infoArray as $k => $v) {
         $tb = parse_array($v, '<td', '</td>');
         $temp = get_attribute($tb[7], 'href');
         $temp = split_string($temp, 'proj_idDes=', AFTER, EXCL);
         $tb[7] = remove($tb[7], '<a', '>');
         $tb[7] = remove($tb[7], '</a', '>');
         $temp2 = $tb[7];
         $tb[7] = [];
         $tb[7][] = $temp2;
         $tb[7][] = $temp;
         $temp4 = get_attribute($tb[8], 'href');
         $temp4 = split_string($temp4, 'proj_idDes=', AFTER, EXCL);
         $tb[8] = remove($tb[8], '<a', '>');
         $tb[8] = remove($tb[8], '</a', '>');
         $temp3 = $tb[8];
         $tb[8] = [];
         $tb[8][] = $temp3;
         $tb[8][] = $temp4;
         $tr[] = $tb;
         //parse_array($v,'<td','</td>');
         //var_dump($tr);
     }
     /*
              [4]=> array(9) { [0]=> string(37) "3" [1]=> string(30) "信息" [2]=> string(49) "201510613089 " [3]=> string(78) "基于人体肢体语言的机械臂操控" [4]=> string(62) "15国家创新训练项目" [5]=> string(45) "张翠芳" [6]=> string(185) "20132235 刘炳楠
     20132312 覃勇杰
     20132230 李晓芳
     20132169 涂敏
     " [7]=> string(145) "查看 " [8]=> string(158) " 成果展" }
     */
     /*$html=new simple_html_dom();
             $html->load($rawInfo["FILE"]);
             //var_dump($html);
     //return $rawInfo;
             $infoArray = $html->find('tr');*/
     return $tr;
 }
function harvest_links($url)
{
    # Initialize
    global $DELAY;
    $link_array = array();
    # Get page base for $url
    $page_base = get_base_page_address($url);
    # Download webpage
    sleep($DELAY);
    $downloaded_page = http_get($url, "");
    $anchor_tags = parse_array($downloaded_page['FILE'], "<a", "</a>", EXCL);
    # Put http attributes for each tag into an array
    for ($xx = 0; $xx < count($anchor_tags); $xx++) {
        $href = get_attribute($anchor_tags[$xx], "href");
        $resolved_addres = resolve_address($href, $page_base);
        $link_array[] = $resolved_addres;
        echo "Harvested: " . $resolved_addres . " \n";
    }
    return $link_array;
}
function describe_zipcode($zipcode)
{
    # Get required libraries and declare the target
    include "../util/LIB_http.php";
    include "../util/LIB_parse.php";
    $target = "http://www.schrenk.com/nostarch/webbots/zip_code_form.php";
    # Download the target
    $page = http_get($target, $ref = "");
    # Parse the session hidden tag from the downloaded page
    # <input type="hidden" name="session" value="xxxxxxxxxx">
    $session_tag = return_between($string = $page['FILE'], $start = "<input type=\"hidden\" name=\"session\"", $end = ">", $type = EXCL);
    # Remove the "'s and "value=" text to reveal the session value
    $session_value = str_replace("\"", "", $session_tag);
    $session_value = str_replace("value=", "", $session_value);
    # Submit the form
    $data_array['session'] = $session_value;
    $data_array['zipcode'] = $zipcode;
    $data_array['Submit'] = "Submit";
    $form_result = http_post_form($target, $ref = $target, $data_array);
    $landmark = "Information about " . $zipcode;
    $table_array = parse_array($form_result['FILE'], "<table", "</table>");
    for ($xx = 0; $xx < count($table_array); $xx++) {
        # Parse the table containing the parsing landmark
        if (stristr($table_array[$xx], $landmark)) {
            $ret['CITY'] = return_between($table_array[$xx], "CITY", "</tr>", EXCL);
            $ret['CITY'] = strip_tags($ret['CITY']);
            $ret['STATE'] = return_between($table_array[$xx], "STATE", "</tr>", EXCL);
            $ret['STATE'] = strip_tags($ret['STATE']);
            $ret['COUNTY'] = return_between($table_array[$xx], "COUNTY", "</tr>", EXCL);
            $ret['COUNTY'] = strip_tags($ret['COUNTY']);
            $ret['LATITUDE'] = return_between($table_array[$xx], "LATITUDE", "</tr>", EXCL);
            $ret['LATITUDE'] = strip_tags($ret['LATITUDE']);
            $ret['LONGITUDE'] = return_between($table_array[$xx], "LONGITUDE", "</tr>", EXCL);
            $ret['LONGITUDE'] = strip_tags($ret['LONGITUDE']);
        }
    }
    # Return the parsed data
    return $ret;
}
Example #8
0
 public function index()
 {
     include "application/libraries/LIB_http.php";
     include "application/libraries/LIB_parse.php";
     $ref = "http://www.wenku8.cn";
     $method = "GET";
     $this->load->model("insertmodel");
     $success = "Catch OK";
     for ($xx = 1; $xx < 1700; $xx++) {
         $target = 'http://www.wenku8.cn/wap/article/packshow.php?id=' . $xx . '&type=txtfull';
         $web_page = http_get($target, $ref);
         //novel_name
         $label = '<card';
         $meta_tag_array = parse_array($web_page['FILE'], $label, ">");
         $meta_tag_array = str_replace(" ", "", $meta_tag_array);
         $meta_tag_array = str_replace("-", "", $meta_tag_array);
         $meta_tag = split_string($meta_tag_array[0], "title=\"", AFTER, EXCL);
         $novel_name = strip_tags(split_string($meta_tag, "TXT", BEFORE, EXCL));
         //update_time
         preg_match_all("/\\d{4}-\\d{1,2}-\\d{1,2}/", @$web_page['FILE'], $matches_array);
         foreach ($matches_array[0] as $key => $value) {
             $update_time = $value;
         }
         echo $update_time;
         //size
         preg_match_all("/\\d+K/", @$web_page['FILE'], $get_array);
         foreach ($get_array[0] as $key => $value) {
             $size = $value;
         }
         $data = array('novel_id' => $xx, 'novel_name' => empty($novel_name) ? '没有这本小说' : $novel_name, 'update_time' => $update_time, 'size' => $size . K);
         //print_r($data);
         echo "<br>";
         $this->insertmodel->insert_Novel($data);
         //echo $data;
     }
     $this->load->view('curl_result', $success);
 }
Example #9
0
    curl_setopt($ch, CURLOPT_REFERER, $file);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
    curl_setopt($ch, CURLOPT_MAXREDIRS, 4);
    $result = curl_exec($ch);
    curl_close($ch);
    return $result;
}
$result = array();
$url = filter_input(INPUT_POST, "input-link");
$last_position = strripos($url, '/');
$last_position += 1;
$url_dir = substr($url, 0, $last_position);
$web_page = http_get($url, $ref = "");
$web_page = $web_page["FILE"];
$a_tag = parse_array($web_page, "<a ", ">");
$count = 0;
foreach ($a_tag as $val) {
    if (stristr($val, "playAudioBarPlay")) {
        $href_str = trim(get_attribute($val, "href"));
        $href_str = str_replace("'", "", $href_str);
        $href_str = str_replace("(", "", $href_str);
        $href_str = str_replace(")", "", $href_str);
        $href_arr = explode(",", $href_str);
        $href_str = str_replace("javascript:playAudioBarPlay", "", $href_arr[0]);
        if (!file_exists("audio/" . $href_str)) {
            $audio_file = download_audio($url_dir . 'aumpo/' . $href_str);
            file_put_contents("audio/" . $href_str, $audio_file);
            $file_size = filesize("audio/" . $href_str, $audio_file);
            if ($file_size === false || $file_size <= 0) {
                $result["result"] = "error download audio form link";
Example #10
0
COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF 
ANY USE OF THE SOFTWARE OR DOCUMENTATION.

The name and trademarks of copyright holders may NOT be used in advertising or publicity pertaining to the 
software without specific, written prior permission. Title to copyright in this software and any associated 
documentation will at all times remain with copyright holders.

Copyright 2007, Michael Schrenk

THIS SCRIPT IS FOR DEMONSTRATION PURPOSES ONLY! 
    It is not suitable for any use other than demonstrating 
    the concepts presented in Webbots, Spiders and Screen Scrapers. 
########################################################################
*/
?>



<?php 
#PHP_LIBRARY_PATH
$PHP_LIBRARY_PATH = "../phplibs";
include "{$PHP_LIBRARY_PATH}/LIB_parse.php";
# Include parse library
include "{$PHP_LIBRARY_PATH}/LIB_http.php";
# Include cURL library
$web_page = http_get($target = "http://www.fbi.gov", $referer = "");
$meta_tag_array = parse_array($web_page['FILE'], "<meta", ">");
for ($xx = 0; $xx < count($meta_tag_array); $xx++) {
    echo $meta_tag_array[$xx] . "\n";
}
 function parse_array($arr)
 {
     if (is_array($arr)) {
         //folder/clip
         foreach ($arr as $name => $sub_arr) {
             if ($name != $sub_arr['name']) {
                 //folder
                 echo "<a onclick='Toggle(this);' style='display: block; cursor: pointer; text-decoration: none;'><img src='resources/images/icon_folder.png' border='none' align='absmiddle' style='margin: 1px 2px 3px 0px;'>" . $name . "</a>";
                 echo "<div style='display: none; padding-left: 16px;'>\n";
                 parse_array($sub_arr);
                 echo "</div>\n";
             } else {
                 //clip
                 echo "<div style='white-space: nowrap;'>\n";
                 echo "<a href='javascript:void(0);' onclick=\"parent.document.getElementById('clip_uuid').value='" . $sub_arr['uuid'] . "'; parent.document.getElementById('clip_name').value='" . $sub_arr['name'] . "';\">";
                 echo "<img src='resources/images/icon_file.png' border='0' align='absmiddle' style='margin: 1px 2px 3px -1px;'>";
                 echo $sub_arr['name'];
                 echo "</a>\n";
                 echo "<textarea style='display: none' id='before_" . $sub_arr['uuid'] . "'>" . $sub_arr['before'] . "</textarea>\n";
                 echo "<textarea style='display: none' id='after_" . $sub_arr['uuid'] . "'>" . $sub_arr['after'] . "</textarea>\n";
                 echo "</div>\n";
             }
         }
     }
 }
function download_images_for_page($target)
{
    echo "target = {$target}\n";
    # Download the web page
    $web_page = http_get($target, $referer = "");
    # Update the target in case there was a redirection
    $target = $web_page['STATUS']['url'];
    # Strip file name off target for use as page base
    $page_base = get_base_page_address($target);
    # Identify the directory where iamges are to be saved
    $save_image_directory = "saved_images_" . str_replace("http://", "", $page_base);
    # Parse the image tags
    $img_tag_array = parse_array($web_page['FILE'], "<img", ">");
    if (count($img_tag_array) == 0) {
        echo "No images found at {$target}\n";
        exit;
    }
    # Echo the image source attribute from each image tag
    for ($xx = 0; $xx < count($img_tag_array); $xx++) {
        $image_path = get_attribute($img_tag_array[$xx], $attribute = "src");
        echo " image: " . $image_path;
        $image_url = resolve_address($image_path, $page_base);
        if (get_base_domain_address($page_base) == get_base_domain_address($image_url)) {
            # Make image storage directory for image, if one doesn't exist
            $directory = substr($image_path, 0, strrpos($image_path, "/"));
            $directory = str_replace(":", "-", $directory);
            $image_path = str_replace(":", "-", $image_path);
            clearstatcache();
            // clear cache to get accurate directory status
            if (!is_dir($save_image_directory . "/" . $directory)) {
                mkpath($save_image_directory . "/" . $directory);
            }
            # Download the image, report image size
            $this_image_file = download_binary_file($image_url, $ref = "");
            echo " size: " . strlen($this_image_file);
            # Save the image
            if (stristr($image_url, ".jpg") || stristr($image_url, ".gif") || stristr($image_url, ".png")) {
                $fp = fopen($save_image_directory . "/" . $image_path, "w");
                fputs($fp, $this_image_file);
                fclose($fp);
                echo "\n";
            }
        } else {
            echo "\nSkipping off-domain image.\n";
        }
    }
}
Example #13
0
 function __construct($data) {
   parent::__construct($data);
   if(is_string($this->data[member_ids])) {
     $this->data[member_ids]=parse_array($this->data[member_ids]);
     $this->data[member_roles]=parse_array($this->data[member_roles]);
     load_objects($this->data[member_ids]);
     foreach($this->data[member_ids] as $i=>$mem) {
       $obj=load_object($mem);
       $this->members[]=array($obj, $this->data[member_roles][$i]);
     }
   }
 }
Example #14
0
 $indirectTotalIndirectRouteDistanceArray = array();
 $indirectTotalRouteDistanceArray = array();
 $numRouteCount = 0;
 //parse the routes
 foreach ($routes as $route) {
     $numRouteCount++;
     $indirectRouteErrorCodeArray = parse_array($indirectRoute, "<ErrorCode>", "</ErrorCode>");
     $indirectStartStopsArray = parse_array($indirectRoute, "<StartStop>", "</StartStop>");
     $indirectStartBusesArray = parse_array($indirectRoute, "<StartBuses>", "</StartBuses>");
     $indirectFirstJunctionsArray = parse_array($indirectRoute, "<FirstJunction>", "</FirstJunction>");
     $indirectDistanceBetweenJunctionArray = parse_array($indirectRoute, "<DistanceBetweenJunction>", "</DistanceBetweenJunction>");
     $indirectSecondJunctionsArray = parse_array($indirectRoute, "<SecondJunction>", "</SecondJunction>");
     $indirectEndBusesArray = parse_array($indirectRoute, "<EndBuses>", "</EndBuses>");
     $indirectEndStopsArray = parse_array($indirectRoute, "<EndStop>", "</EndStop>");
     $indirectTotalIndirectRouteDistanceArray = parse_array($indirectRoute, "<TotalIndirectRouteDistance>", "</TotalIndirectRouteDistance>");
     $indirectTotalRouteDistanceArray = parse_array($indirectRoute, "<TotalRouteDistance>", "</TotalRouteDistance>");
 }
 for ($i = 0; $i < $numRouteCount; $i++) {
     if ($i == 0) {
         echo "StartStop:" . $startStop . "," . "EndStop:" . $endStop . ",depotErrorCode:" . intval($depotErrorCode) . "n";
     }
     echo "<br/>";
     //echo "Route#".$i;
     echo "ErrorCode:" . intval(return_between($indirectRouteErrorCodeArray[$i], "<ErrorCode>", "</ErrorCode>", EXCL)) . "\t";
     //echo "StartStop:".$indirectStartStopsArray[$i]."\t";
     //echo "StartBuses:".$indirectStartBusesArray[$i]."\t";
     //echo "FirstJunction:".$indirectFirstJunctionsArray[$i]."\t";
     echo "DistanceBetweenJucntion:" . $indirectDistanceBetweenJunctionArray[$i] . "\t";
     //echo "SecondJunction:".$indirectSecondJunctionsArray[$i]."\t";
     //echo "EndBuses:".$indirectEndBusesArray[$i]."\t";
     //echo "EndStop:".$indirectEndStopsArray[$i]."\t";


<?php 
# Inlcude http and parse libraries
include "../util/LIB_http_browser.php";
include "../util/LIB_parse.php";
include "../util/LIB_resolve_addresses.php";
include "../util/LIB_http_codes.php";
# Identify the target web page and the page base
$target = "http://www.schrenk.com/nostarch/webbots/page_with_broken_links.php";
$page_base = "http://www.schrenk.com/nostarch/webbots/";
# Download the web page
$downloaded_page = http_get($target, $ref = "");
# Parse the links
$link_array = parse_array($downloaded_page['FILE'], $beg_tag = "<a", $close_tag = ">");
# Verify the links
?>
<table border="1" cellpadding="1" cellspacing="0">
    <tr bgcolor="#e0e0e0">
        <th>URL</th>
        <th>HTTP CODE</th>
        <th>DOWNLOAD TIME (seconds)</th>
    </tr>
<?php 
for ($xx = 0; $xx < count($link_array); $xx++) {
    // Parse the http attribute from link
    $link = get_attribute($tag = $link_array[$xx], $attribute = "href");
    // Create a fully resolved address
    $resloved_link_address = resolve_address($link, $page_base);
    $downloaded_link = http_get($resloved_link_address, $target);
 $indirectDepotArray = array();
 $indirectDistanceBetweenDepotAndStartStopArray = array();
 $indirectDistanceBetweenDepotAndEndStopArray = array();
 $indirectEndBusesArray = array();
 $indirectEndStopsArray = array();
 $indirectTotalRouteDistanceArray = array();
 $routes = parse_array($str, "<Route>", "</Route>");
 $numRouteCount = 0;
 foreach ($routes as $route) {
     $numRouteCount++;
     $indirectRouteErrorCodeArray = parse_array($str, "<ErrorCode>", "</ErrorCode>");
     $indirectStartStopsArray = parse_array($str, "<StartStop>", "</StartStop>");
     $indirectStartBusesArray = parse_array($str, "<StartBuses>", "</StartBuses>");
     $indirectDepotArray = parse_array($str, "<FirstJunction>", "</FirstJunction>");
     $indirectEndBusesArray = parse_array($str, "<EndBuses>", "</EndBuses>");
     $indirectEndStopsArray = parse_array($str, "<EndStop>", "</EndStop>");
 }
 for ($i = 0; $i < $numRouteCount; $i++) {
     $depotNameString = return_between($indirectDepotArray[$i], "<FirstJunction>", "</FirstJunction>", EXCL);
     list($depotName, $lat, $lon) = explode(":", $depotNameString);
     $distanceBetweenDepotAndStartStop = distanceBetweenStops($startStop, $depotName);
     $distanceBetweenDepotAndEndStop = distanceBetweenStops($endStop, $depotName);
     $totalRouteDistance = floatval($distanceBetweenDepotAndStartStop) + floatval($distanceBetweenDepotAndEndStop);
     $sql = "INSERT INTO directdepotbusroutes (DepotErrorCode,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  StartStop,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  EndStop,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  DepotNameString,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  BusesBetweenStartStopAndDepot,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  BusesBetweenEndStopAndDepot,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  DistanceBetweenDepotAndStartStop,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  DistanceBetweenDepotAndEndStop,\r\n\t\t\t\t\t\t\t\t\t\t\t\t  TotalRouteDistance) Values('" . intval(return_between($indirectRouteErrorCodeArray[$i], "<ErrorCode>", "</ErrorCode>", EXCL)) . "','" . $startStop . "','" . $endStop . "','" . $depotNameString . "','" . return_between($indirectStartBusesArray[$i], "<StartBuses>", "</StartBuses>", EXCL) . "','" . return_between($indirectEndBusesArray[$i], "<EndBuses>", "</EndBuses>", EXCL) . "','" . floatval($distanceBetweenDepotAndStartStop) . "','" . floatval($distanceBetweenDepotAndEndStop) . "','" . floatval($totalRouteDistance) . "')";
     echo $sql;
     /*$result=mysql_query($sql);
     		if($result)
     			echo "insert is successful";
     		else
     			echo "<b>INSERT ERROR: ".mysql_error()."</b>". $sql."<br/>";
     		*/
Example #17
0
function category_history_all($id, $param, $version) {
  $pg_id=postgre_escape($id);
  $list=array();

  if(!$version) {
    $res=sql_query("select * from category_current where category_id=$pg_id", $db_central);
    $elem=pg_fetch_assoc($res);
    $version=$elem['version'];
  }

  if($id)
    $res=sql_query("select * from category where category_id=$pg_id", $db_central);
  else {
    $pg_version=postgre_escape($version);
    $res=sql_query("select * from category where category_id=(select category_id from category first where first.version=$pg_version)", $db_central);
  }

  while($elem=pg_fetch_assoc($res)) {
    $elem['category_id']=$elem['category_id'];
    $elem['parent_versions']=parse_array($elem['parent_versions']);
    $elem['version_tags']=parse_hstore($elem['version_tags']);
    $list[$elem['version']]=$elem;
  }

  return array($list, $version);
}
Example #18
0
         continue;
     }
 } else {
     $strHTML = $seed['strHTML'];
 }
 /*Get headers just this time for fun, please*/
 /*	if ($seed["strHeader"]==NULL) {
 			$downloaded_page = http_get_withheader($seed["strURL"], "");
 			$strHeader = substr($downloaded_page['FILE'],0,strpos($downloaded_page['FILE'],"<"));
 			$strSQL = "UPDATE tblPages SET " .
 			"strHeader='" .mysql_real_escape_string($strHeader) . "' WHERE iPageID=" . $seed["iPageID"];
 			db_run_query($strSQL);
 		}*/
 /*End insert*/
 echo "Parsing....\n";
 $anchor_tags = parse_array($strHTML, "<a ", "</a>", EXCL);
 # Put http attributes for each tag into an array
 $sqlQuery = "INSERT INTO tblLinks(fkParentID,fkChildID,fkQueryID,iNumberTimes) VALUES ";
 //print "1 sqlQuery is $sqlQuery\n";
 $outputExists = false;
 for ($xx = 0; $xx < count($anchor_tags); $xx++) {
     //print "tags : ". $anchor_tags[$xx]. "\n";
     $href = get_attribute($anchor_tags[$xx], "href");
     //print "href = $href , page_base = $page_base \n";
     if ($href === false) {
         continue;
     }
     $resolved_address = resolve_address($href, $page_base);
     //echo "have address: $resolved_address\n";
     if (!exclude_link($resolved_address)) {
         try {
Example #19
0
$table_array = parse_array($web_page['FILE'], "<table>", "</table>");

 for($l=0; $l<count($table_array); $l++)
    echo"<br /><b>page_var_table".$k."=:". strip_tags($table_array[$l])."\n";
    echo"<br />";


$img_tag_array = parse_array($web_page['FILE'], "<img", ">");

  for($h=0; $h<count($img_tag_array); $h++)
    echo"<br /><b>page_var_image".$k."=:". htmlentities($img_tag_array[$h])."\n";
    echo"<br />";

}

$link_tag_array = parse_array($web_page['FILE'], "<a", ">");

  for($v=0; $v<count($link_tag_array); $v++)  {
     echo"<br /><b>page_var_image".$k."=:". htmlentities($link_tag_array[$v])."\n";
    echo"<br />";

}

}
else {
$status = "$site is not valid";

}
?>

					else
						{
						$OptionResults = array();
						$OptionResults['value'] = $optionvalue;
						$OptionResults['label'] = $optionlabel;
						}
					array_push($SelectResults['options'], $OptionResults);

					}
				array_push($FormResults['selects'], $SelectResults);
				}

			$beg_tag = "<input";
			$close_tag = ">";

			$InputArray = parse_array($form, $beg_tag, $close_tag);
			$HowManyInput = count($InputArray);
			//echo "How Many Inputs? " . $HowManyInput . "<br />";
			foreach ($InputArray as $Input)
				{

				$Begin_Tag = 'id="';
				$End_Tag = '"';
				$inputid = return_between($Input, $Begin_Tag, $End_Tag, EXCL);
				$inputid = str_replace(chr(34),"",$inputid);

				//$inputid = get_attribute($Input,'id');
				//echo "input id: " . $inputid . "<br />";
				$pos = strpos($inputid, '=');
				if ($pos !== false)
					{
Example #21
0
     $this_image_file = download_binary_file($books[$bookCount]['imageUrl'], $ref = "");
     if (stristr($books[$bookCount]['imageUrl'], ".jpg") || stristr($books[$bookCount]['imageUrl'], ".gif") || stristr($books[$bookCount]['imageUrl'], ".png")) {
         file_put_contents($save_image_directory . basename($books[$bookCount]['imageUrl']), $this_image_file);
     }
 }
 $divClass = 'views-field-title';
 if (stristr($div, $divClass)) {
     $books[$bookCount]['title'] = trim(strip_tags($div));
     $aTag = parse_array($div, '<a', '</a>');
     if ($cloudflare == 1) {
         $books[$bookCount]['bookUrl'] = resolve_address(str_replace("www", "ftp", get_attribute($aTag[0], $attribute = "href")), $page_base);
     } else {
         $books[$bookCount]['bookUrl'] = resolve_address(get_attribute($aTag[0], $attribute = "href"), $page_base);
     }
     $bookPage[$bookCount] = http_get($books[$bookCount]['bookUrl'], $target);
     $bookDivs[$bookCount] = parse_array($bookPage[$bookCount]['FILE'], "<div class=\"product-body\"", "</div>");
     $books[$bookCount]['summary'] = $bookDivs[$bookCount][0];
 }
 $divClass = 'views-field-field-author-value';
 if (stristr($div, $divClass)) {
     $books[$bookCount]['author'] = trim(strip_tags($div));
 }
 $divClass = 'views-field-field-isbn13-value';
 if (stristr($div, $divClass)) {
     $books[$bookCount]['ISBN13'] = trim(strip_tags($div));
 }
 $divClass = 'views-field-field-released-value';
 if (stristr($div, $divClass)) {
     $books[$bookCount]['relesedDate'] = trim(strip_tags($div));
 }
 $divClass = 'views-field-sell-price';
Example #22
0
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */
//Include Libraries
include "../library/LIB_parse.php";
include "../library/LIB_http.php";
include "../library/LIB_simple_spider.php";
include "../library/LIB_resolve_addresses.php";
include "../library/LIB_download_images.php";
$base_url = "http://www.jboss.org/projects";
//Arrays
$projectNames = array();
$web_page = http_get($target = $base_url, $referrer = "J.A.R.V.I.S. Web Bot");
$name_excl = parse_array($web_page['FILE'], "</span>", "</a>");
for ($i = 0; $i < count($name_excl); $i++) {
    $aRemove = strstr($name_excl[$i], "</a>", true);
    $replace = str_replace(" ", "", $aRemove);
    $spanSubRemove = substr($replace, 7);
    array_push($projectNames, $spanSubRemove);
}
array_shift($projectNames);
//var_dump($projectNames);
for ($x = 0; $x < count($projectNames); $x++) {
    $command = "python getsploits.py -o " . $projectNames[$x] . ".txt" . " --type any \"" . $projectNames[$x] . "\"";
    system($command);
}
 #-------------------------------------------------
 # Start: Look for the $desired_site in each result case
 for ($page_rank = 0; $page_rank < count($desired_content_array); $page_rank++) {
     // Look for the $desired_site to appear in one of the listings
     if (stristr($desired_content_array[$page_rank], trim($desired_site))) {
         $url_found_rank_on_page = $page_rank;
         // add one to compensate for listing 0
         $url_found = true;
     }
 }
 # End: Parsing content
 #-------------------------------------------------
 #-------------------------------------------------
 # Start: Get location of the next page
 // Create an array of links on this page
 $search_links = parse_array($result['FILE'], "<a", "</a>", EXCL);
 // Look for the link with the word "Next" in it, as we know this
 // link contains the address of the next page.
 for ($xx = 0; $xx < count($search_links); $xx++) {
     if (strstr($search_links[$xx], "Next")) {
         $previous_target = $target;
         $target = get_attribute($search_links[$xx], "href");
         // Remember that this path is relative to the target page,
         // so add protocol and domain
         $target = "http://www.schrenk.com/nostarch/webbots/search/" . $target;
     }
 }
 # End: Get location of the next page
 #-------------------------------------------------
 # Don't seatch forever, stop after 10 pages
 if ($page_index == 10) {
Example #24
0
$handle = opecc_convert("input.txt");
$input_str = iconv("utf-8", "big5", $handle);
$input_str = trim($input_str);
$action = "http://sunlight.iis.sinica.edu.tw/cgi-bin/text.cgi";
$data_arr = array();
$data_arr["query"] = $input_str;
$result = http($action, $ref = "", $method = "POST", $data_arr, EXCL_HEAD);
$result_str = $result["FILE"];
$result_arr = parse_array($result_str, "<META ", ">");
$res_url = get_attribute($result_arr[1], "CONTENT");
$res_url = explode("=", $res_url);
$res_url = trim($res_url[1], "'");
echo "\n" . "response_url: http://sunlight.iis.sinica.edu.tw/" . $res_url . "\n";
//parse response url
$web_page = http_get("http://sunlight.iis.sinica.edu.tw/" . $res_url, $ref = "");
$web_page = trim($web_page["FILE"], "<br>");
$response_link = parse_array($web_page, "<a ", "</a>");
$response_link2 = parse_array($web_page, "<a ", ">");
$search_arr = array("'", '"');
$counter = count($response_link);
for ($count = 0; $count < $counter; $count++) {
    $temp_str = return_between($response_link[$count], "<a>", "</a>", EXCL);
    $temp_str = explode(">", $temp_str);
    $link = "http://sunlight.iis.sinica.edu.tw/" . get_attribute(str_replace($search_arr, '"', $response_link2[$count]), "HREF");
    $web_page = http_get($link, $ref = "");
    $web_page = $web_page["FILE"];
    $plain_text = return_between($web_page, "<pre>", "</pre>", EXCL);
    $plain_text = str_replace("-", "", trim($plain_text));
    echo "\n" . $plain_text . "\n";
    echo "\n" . $plain_text . "\n";
}
<?php 
include "application/libraries/LIB_http.php";
include "application/libraries/LIB_parse.php";
include "application/libraries/LIB_thumbnail.php";
//
$ref = "http://www.wenku8.cn";
$method = "GET";
$this->load->model("insertmodel");
for ($xx = 1; $xx < 1800; $xx++) {
    $target = 'http://www.wenku8.cn/wap/article/packshow.php?id=' . $xx . '&type=txtfull';
    $web_page = http_get($target, $ref);
    //print_r($web_page);
    //<a href="articleinfo.php?id=1200">打工族买屋记</a>
    //$removed_string=remove($web_page," - TXT","全文下载");
    $label = '<card';
    $meta_tag_array = parse_array($web_page['FILE'], $label, ">");
    $meta_tag_array = str_replace(" ", "", $meta_tag_array);
    $meta_tag_array = str_replace("-", "", $meta_tag_array);
    $meta_tag = $meta_tag_array[0];
    //<cardid="packshow.php"title="文学少女TXT全文下载">
    $meta_tag = split_string($meta_tag, "title=\"", AFTER, EXCL);
    $novel_name = split_string($meta_tag, "TXT", BEFORE, EXCL);
    for ($i = 0; $i < count($meta_tag); $i++) {
        $data = array('novel_id' => $xx, 'novel_name' => $novel_name);
        $this->insertmodel->insert_Novel($data);
    }
}
?>
	</div>
	<p class="footer">Page rendered in <strong>{elapsed_time}</strong> seconds</p>
</div>
function remove($string, $open_tag, $close_tag)
{
    # Get array of things that should be removed from the input string
    $remove_array = parse_array($string, $open_tag, $close_tag);
    # Remove each occurrence of each array element from string;
    for ($xx = 0; $xx < count($remove_array); $xx++) {
        $string = str_replace($remove_array, "", $string);
    }
    return $string;
}
                }
                echo "FOUND: id_column={$id_column}\n";
                echo "FOUND: price_column={$price_column}\n";
                echo "FOUND: name_column={$name_column}\n";
                # Save the heading row for later use
                $heading_row = $table_row;
            }
            # Detect the end of the desired data
            $ending_landmark = "Calculate";
            if (stristr($product_row_array[$table_row], $ending_landmark)) {
                echo "PARSING COMPLETE!\n";
                break;
            }
            # Parse product & price data
            if (isset($heading_row) && $heading_row < $table_row) {
                $table_cell_array = parse_array($product_row_array[$table_row], "<td", "</td>");
                $product_array[$product_count]['ID'] = strip_tags(trim($table_cell_array[$id_column]));
                $product_array[$product_count]['NAME'] = strip_tags(trim($table_cell_array[$name_column]));
                $product_array[$product_count]['PRICE'] = strip_tags(trim($table_cell_array[$price_column]));
                $product_count++;
                echo "PROCESSED: Item #{$product_count}\n";
            }
        }
    }
}
# Display the collected data
for ($xx = 0; $xx < count($product_array); $xx++) {
    echo "{$xx}. ";
    echo "ID: " . $product_array[$xx]['ID'] . ", ";
    echo "NAME: " . $product_array[$xx]['NAME'] . ", ";
    echo "PRICE: " . $product_array[$xx]['PRICE'] . "\n";
include_once "LIB_simple_spider.php";
// spider routines used by this app.
include_once "LIB_db_functions.php";
include_once "LIB_encoding.php";
set_time_limit(0);
// Don't let PHP timeout
db_connect();
//Before starting, check the domains fields of the database and fill in any missing entries
//Also fill in missing
//$strSQL="SELECT strURL,strDomain,strHTML FROM tblPages WHERE bolProcessed=0 AND bolCentral=1";
//$result = mysql_query($strSQL,$GLOBALS["db"]) or die('Query failed: ' . mysql_error());
$seed = db_get_next_to_process();
while ($seed != null) {
    $domain = $seed['strDomain'];
    $html = $seed['strHTML'];
    $atags = parse_array($html, "<a", "</a>");
    foreach ($atags as $tag) {
        $destURL = get_attribute($tag, "href");
        //echo "destURL: $destURL\n";
        if (strpos($destURL, "http://") !== false || strpos($destURL, "https://") !== false) {
            $destDomain = get_domain($destURL);
            //echo "Saving To-From: $domain - $destDomain\n";
            db_update_domain_links($domain, $destDomain);
        }
    }
    db_marked_processed($seed);
    $seed = db_get_next_to_process();
}
db_close();
echo "Done.\n";
mail($operator_email, "Parse Success", "Finished parsing external linked domains: " . date('Y-m-d H:i:s') . "\n", "FROM: " . $operator_email);
Example #29
0
function processRTS($rts)
{
    $replyXML = http_get($target = "http://twitter.com/statuses/show/{$rts}.xml", $referer = "{$tw}");
    $replyArray = parse_array($replyXML['FILE'], "<status>", "</status>");
    $flag = 1;
    for ($i = 0; $i < count($replyArray); $i++) {
        crawlStatus($replyArray[$i]);
    }
}