コード例 #1
0
ファイル: scrape.php プロジェクト: richthegeek/Misc
function parse_rule($rules, $rule, $data = false)
{
    if ($rule->rule->type == "follow_links") {
        $url = $rule->rule->url;
        $pq = new tidyPQ($url);
        $links = pq($rule->rule->selector);
        $results = array();
        foreach ($links as $link) {
            $results[] = stripslashes(pq($link)->attr("href"));
            print "parse rule ";
            if (!in_queue($rule->set, $rule->rule->next_rule, array(stripslashes(pq($link)->attr("href"))))) {
                enqueue($rule->set, $rule->rule->next_rule, array(stripslashes(pq($link)->attr("href"))));
            }
        }
        //parse_rule( $rules, $rules[ $rule->rule->next_rule ], $results );
    } else {
        if ($rule->rule->type == "get_data" && $data) {
            foreach ($data as $url) {
                if (substr($url, 0, strlen($rule->rule->url)) == $rule->rule->url) {
                    $pq = new tidyPQ($url);
                    $results = array();
                    foreach ($rule->rule->selectors as $target => $selector) {
                        $sel = substr($selector, 0, strpos($selector, "%"));
                        $get = substr($selector, strpos($selector, "%") + 1);
                        //$result = pq( $sel );
                        if ($get == "text") {
                            $result = pq($sel)->text();
                        } elseif (substr($get, 0, 5) == "attr=") {
                            $result = pq($sel)->attr(substr($get, 5));
                        }
                        $results[$target] = $result;
                    }
                    store_result($rule->set, $rule->id, $results);
                } else {
                    store_result($rule->set, $rule->id, $url);
                }
            }
        }
    }
}
コード例 #2
0
ファイル: api.php プロジェクト: undefx/delphi-epidata
                                             }
                                         } else {
                                             if ($source === 'nowcast') {
                                                 if (require_all($data, array('locations', 'epiweeks'))) {
                                                     // parse the request
                                                     $locations = extract_values($_REQUEST['locations'], 'str');
                                                     $epiweeks = extract_values($_REQUEST['epiweeks'], 'int');
                                                     // get the data
                                                     $epidata = get_nowcast($locations, $epiweeks);
                                                     store_result($data, $epidata);
                                                 }
                                             } else {
                                                 if ($source === 'meta') {
                                                     // get the data
                                                     $epidata = get_meta();
                                                     store_result($data, $epidata);
                                                 } else {
                                                     $data['message'] = 'no data source specified';
                                                 }
                                             }
                                         }
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
 }