public function testMultipleEmptySections()
 {
     $r = new RecipeParser_Recipe();
     // Empty values for ingredients or instructions should be ingored.
     $r->addIngredientsSection('Pasta');
     $r->appendIngredient('1 lb Spaghetti');
     $r->addIngredientsSection('');
     $r->addIngredientsSection(' ');
     $r->appendIngredient('1 C Water');
     $r->addInstructionsSection('');
     $r->addInstructionsSection(' ');
     $r->addInstructionsSection('  ');
     $r->appendInstruction('Heat water in large pot.');
     $this->assertEquals(2, count($r->ingredients));
     $this->assertEquals(1, count($r->instructions));
 }
Пример #2
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $node_list = $doc->getElementsByTagName('title');
     if ($node_list->length) {
         $value = $node_list->item(0)->nodeValue;
         $value = trim(str_replace("Cooks.com - Recipe - ", "", $value));
         $value = trim(str_replace(" - Recipe - Cooks.com", "", $value));
         $recipe->title = $value;
     }
     // This node contains all ingredients, section titles, and instructions
     $node_list = $xpath->query('//table[@class="hrecipe"]//td/div');
     foreach ($node_list as $node) {
         // Can determine each piece of content by the "style" attributes.
         $style = $node->getAttribute("style");
         // Ingredients found in a div, black text
         if (stripos($style, "color: BLACK;") !== false) {
             $ing_nodes = $xpath->query('./span[@class = "ingredient"]', $node);
             foreach ($ing_nodes as $ing_node) {
                 $recipe->appendIngredient($ing_node->nodeValue);
             }
             // Instructions node
         } else {
             if ($node->getAttribute('class') == "instructions") {
                 foreach ($node->childNodes as $child) {
                     $line = $child->nodeValue;
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     $recipe->appendInstruction($line);
                 }
                 // Section title
             } else {
                 if ($node->getAttribute("class") == "section") {
                     $title = RecipeParser_Text::formatSectionName($node->nodeValue);
                     $recipe->addIngredientsSection($title);
                     if (count($recipe->instructions) > 0) {
                         $recipe->addInstructionsSection($title);
                     }
                 }
             }
         }
     }
     return $recipe;
 }
Пример #3
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//h1[@itemprop="name"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->title = $value;
     }
     // Times and yield
     // <time datetime="PT35M" itemprop="prepTime">
     $nodes = $xpath->query('//time[@itemprop="prepTime"]');
     if ($nodes->length) {
         if ($value = $nodes->item(0)->textContent) {
             $value = RecipeParser_Text::mixedTimeToMinutes($value);
             $recipe->time['total'] = $value;
         }
     }
     $nodes = $xpath->query('//*[@itemprop="recipeYield"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($value);
     }
     // Ingredients
     $nodes = $xpath->query('//*[@itemprop="ingredients"]');
     foreach ($nodes as $node) {
         $value = trim($node->nodeValue);
         if ($value != "Ingredients") {
             $recipe->appendIngredient($value);
         }
     }
     // Instructions
     $nodes = $xpath->query('//span[@class = "steps-list__item__text"]');
     foreach ($nodes as $node) {
         $value = trim($node->nodeValue);
         $value = RecipeParser_Text::stripLeadingNumbers($value);
         $parts = self::splitDirections($value);
         if ($parts['section']) {
             $parts['section'] = RecipeParser_Text::formatSectionName($parts['section']);
             $recipe->addInstructionsSection($parts['section']);
         }
         $recipe->appendInstruction($parts['direction']);
     }
     // Notes
     $nodes = $xpath->query('//div[@class = "recipe-notes__content"]/div/p');
     $notes = array();
     if ($nodes->length) {
         foreach ($nodes as $node) {
             $value = trim($node->nodeValue);
             array_push($notes, $value);
         }
         $recipe->notes = implode(' | ', $notes);
     }
     // Photo
     $nodes = $xpath->query('//img[@class = "recipe-carousel__recipe__img"]');
     if ($nodes && $nodes->item(1)) {
         $photo_url = $nodes->item(1)->getAttribute('src');
         if (strpos($photo_url, 'default-recipe-image.gif') === false && strpos($photo_url, 'placeholder.gif') === false) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
     }
     return $recipe;
 }
Пример #4
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//h3[@class = "title"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->title = $value;
     }
     // Cook times
     $nodes = $xpath->query('//div[@class = "recipe-metadata"]/ul/li');
     foreach ($nodes as $node) {
         $sub_nodes = $node->childNodes;
         $key = null;
         $value = null;
         foreach ($sub_nodes as $sub_node) {
             if ($sub_node->nodeName == 'h5') {
                 $key = trim($sub_node->nodeValue);
             }
             if ($sub_node->nodeName == 'p') {
                 $value = trim($sub_node->nodeValue);
             }
         }
         // Inspect keys/values we've found.
         if ($key == 'Total Time:') {
             $value = self::cleanupTime($value);
             $recipe->time['total'] = RecipeParser_Times::toMinutes($value);
         }
         if ($key == 'Prep Time:') {
             $value = self::cleanupTime($value);
             $recipe->time['prep'] = RecipeParser_Times::toMinutes($value);
         }
     }
     $node_list = $xpath->query('//dd[@class = "preptime"]');
     if ($node_list->length) {
         $value = $node_list->item(0)->nodeValue;
         $recipe->time['prep'] = RecipeParser_Times::toMinutes($value);
     }
     $node_list = $xpath->query('//dd[@class = "cooktime"]');
     if ($node_list->length) {
         $value = $node_list->item(0)->nodeValue;
         $recipe->time['cook'] = RecipeParser_Times::toMinutes($value);
     }
     $node_list = $xpath->query('//dd[@class = "duration totaltime special"]');
     if ($node_list->length) {
         $value = $node_list->item(0)->nodeValue;
         $recipe->time['total'] = RecipeParser_Times::toMinutes($value);
     }
     // Ingredients, Yield, Description, Notes, etc.
     $nodes = $xpath->query('//div[@class = "recipe-body"]/*');
     $section_title = null;
     foreach ($nodes as $node) {
         // Section titles
         if ($node->nodeName == 'h4') {
             $value = $node->nodeValue;
             $value = trim(strtolower($value));
             $section_title = $value;
             continue;
         }
         $in_section = false;
         if ($node->nodeName == 'div') {
             // Ensure that we're in a <div class="section"> node.
             foreach ($node->attributes as $attr_name => $attr_node) {
                 if ($attr_name == 'class' && $attr_node->value == 'section') {
                     $in_section = true;
                 }
             }
             if (!$in_section) {
                 continue;
             }
             // Description should be first text, before any section titles.
             if (!$section_title) {
                 $value = $node->nodeValue;
                 $value = preg_replace("/^(Drink\\:|Top Chef).*\$/m", '', $value);
                 $value = str_replace("\n\n", "\n", $value);
                 $value = trim($value);
                 $recipe->description = $value;
                 // Yield
             } else {
                 if ($section_title == 'yield') {
                     $value = trim($node->nodeValue);
                     $recipe->yield = $value;
                     // Notes
                 } else {
                     if ($section_title == 'notes') {
                         $value = trim($node->nodeValue);
                         $value = str_replace("\n\n", "\n", $value);
                         $recipe->notes = $value;
                         // Ingredients
                     } else {
                         if ($section_title == 'ingredients') {
                             $sub_nodes = $node->childNodes;
                             foreach ($sub_nodes as $sub_node) {
                                 if ($sub_node->nodeName == 'h5') {
                                     $value = RecipeParser_Text::formatSectionName($sub_node->nodeValue);
                                     $recipe->addIngredientsSection($value);
                                 } else {
                                     if ($sub_node->nodeName == 'ul') {
                                         $li_nodes = $sub_node->childNodes;
                                         foreach ($li_nodes as $li_node) {
                                             $value = trim($li_node->nodeValue);
                                             $recipe->appendIngredient($value);
                                         }
                                     }
                                 }
                             }
                             // Instructions
                         } else {
                             if ($section_title == 'directions') {
                                 $sub_nodes = $node->childNodes;
                                 foreach ($sub_nodes as $sub_node) {
                                     $value = trim($sub_node->nodeValue);
                                     // Section titles appear in all-caps.
                                     if ($value && ($value == strtoupper($value) || preg_match('/:$/', $value))) {
                                         $value = RecipeParser_Text::formatSectionName($value);
                                         $recipe->addInstructionsSection($value);
                                     } else {
                                         $value = RecipeParser_Text::stripLeadingNumbers($value);
                                         $recipe->appendInstruction($value);
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
         }
     }
     // Source / Chef
     $nodes = $xpath->query('//div[@class = "recipe-sidebar"]/div/*');
     $section_title = null;
     $chef_name = null;
     $show_name = 'Bravo TV';
     foreach ($nodes as $node) {
         if ($node->nodeName == 'h4') {
             $value = trim($node->nodeValue);
             $section_title = strtolower($value);
             continue;
         }
         if ($node->nodeName == 'small') {
             if ($section_title == 'chef' || $section_title == 'author') {
                 $value = trim($node->nodeValue);
                 $chef_name = $value;
                 break;
             }
         }
     }
     $nodes = $xpath->query('//div[@class = "section"]/p[1]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         if (strpos($value, 'Top Chef Masters') !== false) {
             $show_name = 'Top Chef Masters';
         } else {
             if (strpos($value, 'Top Chef') !== false) {
                 $show_name = 'Top Chef';
             }
         }
     }
     $recipe->credits = $chef_name . ', ' . $show_name;
     $nodes = $xpath->query('//div[@class = "recipe-header clearfix"]//img');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute('src');
         $photo_url = str_replace('/medium/', '/original/', $photo_url);
         $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
     }
     return $recipe;
 }
Пример #5
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//h1[@itemprop="name"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->title = $value;
     }
     // Times and yield
     // <meta content="PT3H30M" itemprop="totalTime">
     $nodes = $xpath->query('//meta[@itemprop="totalTime"]');
     if ($nodes->length) {
         if ($value = $nodes->item(0)->getAttribute('content')) {
             $value = RecipeParser_Text::iso8601ToMinutes($value);
             $recipe->time['total'] = $value;
         }
     }
     $nodes = $xpath->query('//*[@itemprop="recipeYield"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($value);
     }
     // Ingredients
     $nodes = $xpath->query('//div[@id = "ingredients"]/*');
     foreach ($nodes as $node) {
         if ($node->nodeName == 'h2') {
             $value = trim($node->nodeValue);
             $value = RecipeParser_Text::formatSectionName($value);
             if ($value != "Ingredients") {
                 $recipe->addIngredientsSection($value);
             }
         } else {
             if ($node->nodeName == 'ol') {
                 $subnodes = $xpath->query('./li/span', $node);
                 foreach ($subnodes as $subnode) {
                     $value = trim($subnode->nodeValue);
                     $recipe->appendIngredient($value);
                 }
             }
         }
     }
     // Instructions
     $nodes = $xpath->query('//div[@id = "directions"]/ol/li');
     foreach ($nodes as $node) {
         $value = trim($node->nodeValue);
         $value = RecipeParser_Text::stripLeadingNumbers($value);
         $parts = self::splitDirections($value);
         if ($parts['section']) {
             $parts['section'] = RecipeParser_Text::formatSectionName($parts['section']);
             $recipe->addInstructionsSection($parts['section']);
         }
         $recipe->appendInstruction($parts['direction']);
     }
     // Notes
     $nodes = $xpath->query('//div[@id = "directions"]/div[@id = "endnotes"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->notes = $value;
     }
     // Photo
     $nodes = $xpath->query('//img[@itemprop="image"]');
     if ($nodes && $nodes->item(0)) {
         $photo_url = $nodes->item(0)->getAttribute('src');
         if (strpos($photo_url, 'default-recipe-image.gif') === false && strpos($photo_url, 'placeholder.gif') === false) {
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
         }
     }
     return $recipe;
 }