Ejemplo n.º 1
0
<?php

$tidy = tidy_parse_file("clean_ex1.html", array("clean" => true));
tidy_clean_repair($tidy);
echo $tidy;
Ejemplo n.º 2
0
<?php

$tidy = tidy_parse_file("http://www.foodpro.huds.harvard.edu/foodpro/menu_items.asp?date=12-2-2009&type=30&meal=2", array("numeric-entities" => true, "output-xhtml" => true));
$tidy->cleanRepair();
$xhtml = (string) $tidy;
$dom = simplexml_load_string($xhtml);
$dom->registerXPathNamespace("xhtml", "http://www.w3.org/1999/xhtml");
$trs = $dom->xpath("//xhtml:form[@id='report_form']/xhtml:table/xhtml:tr");
unset($category);
foreach ($trs as $tr) {
    // remember category
    if ($tr["class"] == "category") {
        $category = trim((string) $tr->td);
    } else {
        if (!isset($category)) {
            continue;
        } else {
            // get item
            $a = $tr->td->div->span->a;
            if (!($item = trim($a))) {
                continue;
            }
            // determine recipe
            if (!preg_match("/recipe=(\\d+)/", $a["href"], $matches)) {
                continue;
            }
            $recipe = $matches[1];
            // INSERT INTO into items
            $sql = sprintf("INSERT IGNORE INTO items (recipe, item) VALUES('%s', '%s')", mysql_real_escape_string($recipe), mysql_real_escape_string($item));
            mysql_query($sql);
            // INSERT INTO legend
Ejemplo n.º 3
0
<?php

/*
 * dumpit5.php
 *
 * a command-line script which dumps the given HTML, PHP, ASP, XHTML, etc.
 * file as it is represented in the document model.
 *
 * NOTE: Only works with tidy for PHP 5+, for tidy in 4.3.x, see dumpit.php
 *
 * By: John Coggeshall <*****@*****.**>
 *
 * Usage; php dumpit5.php <filename>
 */
$tidy = tidy_parse_file($_SERVER['argv'][1]);
/* Optionally you can do this here if you want to fix up the document */
/* $tidy->clean_repair() */
$tree = $tidy->root();
dump_tree($tree);
echo "\n";
function node_type($type)
{
    switch ($type) {
        case TIDY_NODETYPE_ROOT:
            return "Root Node";
        case TIDY_NODETYPE_DOCTYPE:
            return "DocType Node";
        case TIDY_NODETYPE_COMMENT:
            return "Comment Node";
        case TIDY_NODETYPE_PROCINS:
            return "ProcIns Node";
Ejemplo n.º 4
0
Archivo: Safe.php Proyecto: roojs/pear
 function parseFile($fn)
 {
     // Save all '<' symbols
     //$doc = preg_replace("/<(?=[^a-zA-Z\/\!\?\%])/", '&lt;', $doc);
     // Web documents shouldn't contains \x00 symbol
     //$doc = str_replace("\x00", '', $doc);
     // Opera6 bug workaround
     //$doc = str_replace("\xC0\xBC", '&lt;', $doc);
     // UTF-7 encoding ASCII decode
     //$doc = $this->repackUTF7($doc);
     if (!extension_loaded('tidy')) {
         die("Add tidy extension to extension.ini");
     }
     $tree = tidy_parse_file($fn, array(), 'UTF8');
     return $this->tidyTree($tree->root());
     // use tidy!!!!
 }
Ejemplo n.º 5
0
<?php

$tidy = tidy_parse_file("intro2_ex1.html");
tidy_clean_repair($tidy);
echo tidy_get_output($tidy);
Ejemplo n.º 6
0
            }
            if (!isset($get[$e])) {
                continue;
            }
            $key = strtolower(basename($url));
            $files[$key] = array('url' => $url, 'referer' => $file);
        }
        dump_nodes($file, $child);
    }
}
$a = scandir($scandir);
foreach ($a as $file) {
    if (!preg_match('/\\.html?/i', $file)) {
        continue;
    }
    $tidy = tidy_parse_file($scandir . '/' . $file);
    dump_nodes($scandir . $file, $tidy->root());
}
$len = strlen($base) + 1;
ksort($files);
foreach ($files as $key => $value) {
    $url = $value['url'];
    $referer = $value['referer'];
    $file = substr($url, $len);
    #printf("file=%s url=%s\n", $file, $url);
    $zip = $dir . '/' . $file;
    if (!file_exists($zip)) {
        #		fprintf(STDERR, "File not found: %s\n", $zip);
        continue;
    }
    $pathinfo = pathinfo($zip);
Ejemplo n.º 7
0
<?php

$tidy = tidy_parse_file("example.html");
/* Optionally you can do this here if you want to fix up the document */
/* $tidy->clean_repair() */
$tree = $tidy->root();
dump_tree($tree);
echo "\n";
function node_type($type)
{
    switch ($type) {
        case TIDY_NODETYPE_ROOT:
            return "Root Node";
        case TIDY_NODETYPE_DOCTYPE:
            return "DocType Node";
        case TIDY_NODETYPE_COMMENT:
            return "Comment Node";
        case TIDY_NODETYPE_PROCINS:
            return "ProcIns Node";
        case TIDY_NODETYPE_TEXT:
            return "Text Node";
        case TIDY_NODETYPE_START:
            return "Start Node";
        case TIDY_NODETYPE_END:
            return "End Node";
        case TIDY_NODETYPE_STARTEND:
            return "Start/End Node";
        case TIDY_NODETYPE_CDATA:
            return "CDATA Node";
        case TIDY_NODETYPE_SECTION:
            return "Section Node";
Ejemplo n.º 8
0
 private function getRawHtml($file = '')
 {
     $url = $this->feedUrl . "/" . $file;
     if ($file == '') {
         $file = "index.html";
     }
     // just for local file, not url.
     $path = md5($this->feedUrl);
     $local_file = $path . "/" . $file;
     $this->localPath = e_TEMP . $path . "/";
     if (!is_dir(e_TEMP . $path)) {
         mkdir(e_TEMP . $path, 0755);
     }
     if (!file_exists(e_TEMP . $local_file)) {
         e107::getFile()->getRemoteFile($url, $local_file);
         // downloads to e107_system/.../temp
     }
     if ($this->useTidy) {
         $tidy = new tidy();
         $options = array("output-xhtml" => true, "clean" => true);
         $parsed = tidy_parse_file(e_TEMP . $local_file, $options);
         return $parsed->value;
     } elseif (!($html = file_get_contents(e_TEMP . $local_file))) {
         return "Couldn't read file";
     }
     return $html;
 }
Ejemplo n.º 9
0
<?php

$tidy = tidy_parse_file(dirname(__FILE__) . "/015.html", array('show-body-only' => true));
tidy_clean_repair($tidy);
echo tidy_get_output($tidy);
Ejemplo n.º 10
0
#!/usr/bin/env php
<?php 
// Web scrape Pilot/FlyingJ truckstop lat/lng for each store (rather than copy/paste 647 times)
// Insert latlngs
// constants and functions
require "../includes/config.php";
$values = [];
$ids = array(1, 2, 3, 4, 6, 8, 9, 11, 12, 13, 14, 15, 16, 17, 21, 23, 24, 26, 28, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 71, 72, 73, 75, 76, 77, 79, 81, 82, 87, 88, 89, 90, 91, 92, 94, 95, 96, 97, 114, 118, 130, 131, 133, 134, 137, 140, 141, 144, 145, 146, 147, 149, 150, 151, 152, 154, 156, 157, 159, 163, 164, 165, 167, 168, 171, 174, 179, 180, 190, 192, 195, 196, 198, 199, 200, 206, 208, 209, 210, 211, 213, 219, 222, 224, 226, 231, 232, 233, 234, 236, 237, 238, 239, 240, 242, 243, 245, 247, 249, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 265, 266, 267, 268, 270, 271, 274, 275, 278, 279, 280, 281, 282, 284, 285, 286, 287, 289, 290, 293, 294, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 316, 317, 318, 319, 321, 322, 324, 326, 328, 329, 330, 331, 332, 335, 336, 337, 338, 339, 340, 341, 343, 346, 347, 348, 350, 351, 352, 353, 354, 356, 358, 360, 362, 363, 365, 366, 367, 368, 369, 370, 372, 373, 374, 375, 377, 378, 380, 381, 384, 385, 386, 387, 388, 390, 391, 392, 393, 394, 396, 398, 399, 402, 403, 404, 405, 406, 407, 408, 409, 411, 412, 413, 415, 416, 417, 420, 421, 422, 424, 425, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 453, 454, 455, 457, 458, 459, 460, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 481, 482, 483, 485, 486, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 500, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 522, 523, 524, 525, 526, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 546, 547, 549, 550, 551, 553, 554, 555, 556, 557, 559, 568, 571, 572, 575, 576, 579, 580, 581, 583, 584, 586, 589, 590, 592, 593, 594, 595, 596, 597, 599, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 630, 631, 632, 633, 634, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 649, 650, 652, 653, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 671, 672, 673, 674, 675, 676, 677, 678, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 746, 747, 748, 749, 750, 752, 753, 754, 756, 758, 759, 760, 761, 762, 763, 764, 765, 768, 770, 772, 773, 774, 775, 777, 784, 871, 873, 874, 875, 876, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 920, 922, 923, 924, 925, 931, 932, 934, 963, 964, 965, 966, 967, 968, 970, 1001, 1002, 1003, 1004, 1005, 1006, 1012, 1013, 1015, 1020, 1021, 1023, 1024, 1025, 1026, 1027, 1028, 1030, 1033, 1043, 1054, 1058, 4619, 4622, 4642, 4649, 4651, 4656, 8601, 8604, 8605, 8616, 8621, 8624, 8625, 8628, 8630, 8658);
foreach ($ids as $id) {
    echo " \n ";
    // parse source
    if (!($tidy = tidy_parse_file("http://www.pilotflyingj.com/view-location?id={$id}", array("numeric-entities" => true, "output-xhtml" => true)))) {
        continue;
    }
    // convert to XHTML
    $tidy->cleanRepair();
    $xhtml = (string) $tidy;
    // parse XHTML
    $dom = simplexml_load_string($xhtml);
    // register XHTML namespace
    $dom->registerXPathNamespace("xhtml", "http://www.w3.org/1999/xhtml");
    // get store, lat/lng, and diesel price from paragraphs
    $paras = $dom->xpath("//xhtml:div[@id='indiv-location-store-info'][1]//xhtml:p[position()=1 or position()=6]");
    foreach ($paras as $para) {
        $strong = trim((string) $para[0]->strong);
        if ($strong === "Store Number:") {
            $store_number = trim((string) $para);
        }
        if ($strong === "Coordinates:") {
            $coords = trim((string) $para);
        }
Ejemplo n.º 11
0
<?php

$a = tidy_parse_file(dirname(__FILE__) . "/005.html");
echo tidy_get_output($a);
Ejemplo n.º 12
0
<?php

/* Parse a new document */
$tidy = tidy_parse_file("http://www.coggeshall.org/");
/* Clean and repair the document */
$tidy->clean_repair();
/* Output the results; */
echo $tidy;
Ejemplo n.º 13
0
<?php

$tidy = tidy_parse_file(dirname(__FILE__) . "/016.html", dirname(__FILE__) . "/016.tcfg");
tidy_clean_repair($tidy);
echo tidy_get_output($tidy);
Ejemplo n.º 14
0
<?php

$opts = array("clean" => true, "drop-proprietary-attributes" => true, "drop-font-tags" => true, "drop-empty-paras" => true, "hide-comments" => true, "join-classes" => true, "join-styles" => true);
$tidy = tidy_parse_file("php.html", $opts);
tidy_clean_repair($tidy);
echo $tidy;
Ejemplo n.º 15
0
<?php

$tidy = tidy_parse_file("http://www.php.net/", array('output-xhtml' => true));
$tidy->cleanRepair();
echo $tidy;
Ejemplo n.º 16
0
<?php

/* Parse a file */
$tidy1 = tidy_parse_file("myfile.html");
/* Parse a string */
$tidy2 = tidy_parse_string("<HTML><B>Hello!</B>");
/* Clean up the markup */
tidy_clean_repair($tidy1);
tidy_clean_repair($tidy2);
/* Get the error buffer */
$errors = tidy_get_error_buffer($tidy1);
/* Get the output */
$output = tidy_get_output($tidy2);