Example #1
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Notes -- Collect the non-standard cook times and baking temps,
     // and also any tips/notes that appear at the end of the recipe instructions.
     $notes = array();
     $nodes = $xpath->query('//*[@class="recipeTips"]//li');
     foreach ($nodes as $node) {
         $value = RecipeParser_Text::FormatAsOneLine($node->nodeValue);
         $value = preg_replace("/^(Tip|Note)\\s*(.*)\$/", "\$2", $value);
         $notes[] = $value;
     }
     $nodes = $xpath->query('//*[@class="recipeInfo"]//*[@class="type"]');
     foreach ($nodes as $node) {
         $value = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         if (strpos($value, "Makes:") !== false) {
             continue;
         }
         $notes[] = $value;
     }
     $recipe->notes = implode("\n\n", $notes);
     // Adjust Photo URL for larger dimensions
     $recipe->photo_url = preg_replace("/\\/l_([^\\/]+)/", "/550_\$1", $recipe->photo_url);
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id = "recipe-ingredients"]/*');
     foreach ($nodes as $node) {
         if ($node->nodeName == 'p') {
             $value = trim($node->nodeValue);
             // Older recipes will have ingredients jumbled into a single <p>
             // rather than using 'ingredients' classes. If the node value looks
             // like multiple lines, treat it like a section header followed by
             // section ingredients.
             $lines = explode("\n", $value);
             if (count($lines) > 1) {
                 for ($i = 0; $i < count($lines); $i++) {
                     $line = trim($lines[$i]);
                     if ($i == 0) {
                         $line = RecipeParser_Text::formatSectionName($line);
                         $recipe->addIngredientsSection($line);
                     } else {
                         $line = trim($line);
                         $recipe->appendIngredient($line);
                     }
                 }
                 // Otherwise, we're dealing with a normal section for hrecipe, and
                 // ingredients for the section will follow as <ul> elements.
             } else {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             }
         } else {
             if ($node->nodeName == 'ul') {
                 $subnodes = $xpath->query('./li[@class = "ingredient"]', $node);
                 foreach ($subnodes as $subnode) {
                     $value = trim($subnode->nodeValue);
                     $recipe->appendIngredient($value);
                 }
             }
         }
     }
     // Notes
     $nodes = $xpath->query('//*[@id="recipe-intronote"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         $recipe->notes = RecipeParser_Text::formatAsParagraphs($value);
     }
     // Photo URL to replace og:image
     $nodes = $xpath->query('//img[@itemprop="image"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute("src");
         $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
     }
     return $recipe;
 }
Example #3
0
 public static function parse($html, $url)
 {
     // Get all of the standard hrecipe stuff we can find.
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Multi-stage ingredients
     $nodes = $xpath->query('//dl[@id="stages"]/*');
     if ($nodes->length) {
         $recipe->resetIngredients();
         foreach ($nodes as $node) {
             if ($node->nodeName == 'dt') {
                 $value = $node->nodeValue;
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 if ($node->nodeName == 'dd') {
                     $subs = $xpath->query('.//*[@class="ingredient"]', $node);
                     foreach ($subs as $sub) {
                         $value = trim($sub->nodeValue);
                         $recipe->appendIngredient($value);
                     }
                 }
             }
         }
     }
     return $recipe;
 }
Example #4
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Photo -- skip logo if it was used in place of photo
     if (strpos($recipe->photo_url, "FDC_Logo_vertical.png") !== false || strpos($recipe->photo_url, "FDC_share-logo.png") !== false) {
         $recipe->photo_url = '';
     }
     if ($recipe->photo_url) {
         $recipe->photo_url = str_replace("/thumbs/", "/large/", $recipe->photo_url);
     }
     // Yield
     $yield = '';
     $nodes = $xpath->query('//*[@class="yield"]');
     // Find as 'yield'
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatYield($line);
         $recipe->yield = $line;
         // Or as number of 'servings'
     } else {
         $nodes = $xpath->query('//*[@class="servings"]//*[@class="value"]');
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $line = RecipeParser_Text::formatYield($line);
             $recipe->yield = $line;
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // OVERRIDES for epicurious
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id = "ingredients"]/*');
     foreach ($nodes as $node) {
         // <strong> contains ingredient section names
         if ($node->nodeName == 'strong') {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients from inside of <ul class="ingredientsList">
         if ($node->nodeName == 'ul') {
             // Child nodes should all be <li>
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 if ($ing_node->nodeName == 'li') {
                     $line = trim($ing_node->nodeValue);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Photo URL, use larger version found on MyRecipes
     $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@class="recipeDetails"]/ul');
     foreach ($nodes->item(0)->childNodes as $li) {
         if ($li->nodeName == 'li') {
             $text = RecipeParser_Text::FormatAsOneLine($li->nodeValue);
             if ($li->getAttribute('itemprop') == 'ingredient') {
                 $text = trim(str_replace('$Click to see savings', '', $text));
                 $recipe->appendIngredient($text);
             } else {
                 $text = RecipeParser_Text::formatSectionName($text);
                 $recipe->addIngredientsSection($text);
             }
         }
     }
     // Credits
     $nodes = $xpath->query('//*[@itemprop="author"]');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         $recipe->credits = $line;
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = null;
     if (!$nodes || !$nodes->length) {
         $nodes = $xpath->query('//*[@id="recipe-ingredients"]//div[@class="view-content"]/*');
     }
     if (!$nodes || !$nodes->length) {
         $nodes = $xpath->query('//*[@id="recipe-ingredients"]//div[@class="ingredient-lists separator-serated tab-content"]/*');
     }
     foreach ($nodes as $node) {
         if ($node->nodeName == 'h3') {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatSectionName($line);
             $recipe->addIngredientsSection($line);
         } else {
             if ($node->nodeName == 'ul') {
                 foreach ($node->childNodes as $subnode) {
                     $line = $subnode->nodeValue;
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
Example #8
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//*[@id="page-title"]');
     if ($nodes->length) {
         $line = RecipeParser_Text::formatTitle($nodes->item(0)->nodeValue);
         $recipe->title = $line;
     }
     // Times
     $nodes = $xpath->query('//*[@class="field-recipe-time"]');
     foreach ($nodes as $node) {
         $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         if (strpos($line, "Hands-On Time") !== false) {
             $line = str_replace("Hands-On Time ", "", $line);
             $recipe->time["prep"] = RecipeParser_Times::toMinutes($line);
         } else {
             if (strpos($line, "Total Time") !== false) {
                 $line = str_replace("Total Time ", "", $line);
                 $recipe->time["total"] = RecipeParser_Times::toMinutes($line);
             }
         }
     }
     // Yield
     $nodes = $xpath->query('//*[@class="field-yield"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatYield($line);
         $recipe->yield = $line;
     }
     // Ingredients
     $nodes = $xpath->query('//*[@class="field-ingredients"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//*[@class="field-instructions"]//li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendInstruction($line);
     }
     // Photo
     $nodes = $xpath->query('//*[@property="og:image"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute('content');
         $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $hrecipe = $xpath->query('//section[@role="main"]');
     if ($hrecipe->length) {
         $hrecipe = $hrecipe->item(0);
         // Title is not marked up with class="fn"
         $nodes = $xpath->query('.//h1', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->title = RecipeParser_Text::formatTitle($value);
         }
         // Yield -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info yield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         $nodes = $xpath->query('.//span[@itemprop="recipeYield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Prep Times -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info preptime"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->time['prep'] = RecipeParser_Times::toMinutes($value);
         }
         // Total Time / Duration -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info duration"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->time['total'] = RecipeParser_Times::toMinutes($value);
         }
     }
     // Photo
     $nodes = $xpath->query('//section[@class="content-unit"]/img');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute('src');
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
     }
     // Remove recipe title intros -- e.g. "Sunday Dinner: Pork Ribs" changes to "Pork Ribs"
     if (strpos($recipe->title, ": ") !== false) {
         $recipe->title = preg_replace("/^[^:]+: (.+)/", "\$1", $recipe->title);
     }
     return $recipe;
 }
Example #10
0
 public static function getBookmarkAsRecipeStruct($html, $url)
 {
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // This recipe will be stored as a bookmark
     $recipe = new RecipeStruct();
     $recipe->url = $url;
     $recipe->status = "bookmark";
     // Find the page title
     $title = "";
     $title_tag = "";
     $title_og_meta = "";
     $nodes = $xpath->query('//title');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatTitle($line);
         if ($line) {
             $title_tag = $line;
         }
     }
     $nodes = $xpath->query('//meta[@property="og:title"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->getAttribute("content");
         $line = RecipeParser_Text::formatTitle($line);
         if ($line) {
             $title_og_meta = $line;
         }
     }
     // Which title string to use?
     if ($title_og_meta) {
         $title = $title_og_meta;
     } else {
         if ($title_tag) {
             $title = $title_tag;
         } else {
             $title = "Recipe from {$url}";
         }
     }
     $recipe->title = $title;
     // Get image from Open Graph tag
     $nodes = $xpath->query('//meta[@property="og:image"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute("content");
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " yield ")]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Times
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " prepTime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " rspec-cook-time ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " totaltime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     // Ingredients
     $recipe->resetIngredients();
     $ing_nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredients ")]/*');
     foreach ($ing_nodes as $ing_node) {
         if ($ing_node->getAttribute('class') == "ingr-divider") {
             $line = RecipeParser_Text::formatSectionName($ing_node->nodeValue);
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients from inside of <ul class="ingredientsList">
         // Child nodes should all be <li>
         if ($ing_node->nodeName == 'ul') {
             foreach ($ing_node->childNodes as $node) {
                 $line = trim($node->nodeValue);
                 $recipe->appendIngredient($line);
             }
             continue;
         }
     }
     return $recipe;
 }
Example #12
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title missing?
     if (!$recipe->title) {
         $nodes = $xpath->query('//meta[@property="og:title"]');
         if ($nodes->length) {
             $line = $nodes->item(0)->getAttribute("content");
             $line = RecipeParser_Text::formatTitle($line);
             $recipe->title = $line;
         }
     }
     // Photo URL, use larger version found on MyRecipes
     $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url);
     // Credits
     $nodes = $xpath->query('//*[@class="link-list"]/h4');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         if (strpos($line, "More from") === 0) {
             $line = str_replace("More from ", "", $line);
             $recipe->credits = $line;
         }
     }
     // Times
     $searches = array('prep' => 'prep: ', 'cook' => 'cook: ', 'total' => 'total: ');
     $nodes = $xpath->query('//*[@class="recipe-time-info"]');
     foreach ($nodes as $node) {
         $line = trim(strtolower($node->nodeValue));
         foreach ($searches as $key => $value) {
             if (strpos($line, $value) === 0) {
                 $line = str_replace($value, "", $line);
                 $recipe->time[$key] = RecipeParser_Times::toMinutes($line);
             }
         }
     }
     // Clean up each of the ingredients to remove "$Click to see savings"
     // These don't come through in the curl'ed test files
     for ($i = 0; $i < count($recipe->ingredients); $i++) {
         for ($j = 0; $j < count($recipe->ingredients[$i]['list']); $j++) {
             if (strpos($recipe->ingredients[$i]['list'][$j], "\$") > 0) {
                 $recipe->ingredients[$i]['list'][$j] = substr($recipe->ingredients[$i]['list'][$j], 0, strpos($recipe->ingredients[$i]['list'][$j], "\$"));
             }
         }
     }
     return $recipe;
 }
Example #13
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $node_list = $doc->getElementsByTagName('title');
     if ($node_list->length) {
         $value = $node_list->item(0)->nodeValue;
         $value = trim(str_replace("Cooks.com - Recipe - ", "", $value));
         $value = trim(str_replace(" - Recipe - Cooks.com", "", $value));
         $recipe->title = $value;
     }
     // This node contains all ingredients, section titles, and instructions
     $node_list = $xpath->query('//table[@class="hrecipe"]//td/div');
     foreach ($node_list as $node) {
         // Can determine each piece of content by the "style" attributes.
         $style = $node->getAttribute("style");
         // Ingredients found in a div, black text
         if (stripos($style, "color: BLACK;") !== false) {
             $ing_nodes = $xpath->query('./span[@class = "ingredient"]', $node);
             foreach ($ing_nodes as $ing_node) {
                 $recipe->appendIngredient($ing_node->nodeValue);
             }
             // Instructions node
         } else {
             if ($node->getAttribute('class') == "instructions") {
                 foreach ($node->childNodes as $child) {
                     $line = $child->nodeValue;
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     $recipe->appendInstruction($line);
                 }
                 // Section title
             } else {
                 if ($node->getAttribute("class") == "section") {
                     $title = RecipeParser_Text::formatSectionName($node->nodeValue);
                     $recipe->addIngredientsSection($title);
                     if (count($recipe->instructions) > 0) {
                         $recipe->addInstructionsSection($title);
                     }
                 }
             }
         }
     }
     return $recipe;
 }
Example #14
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Photo -- skip logo if it was used in place of photo
     if (strpos($recipe->photo_url, "FDC_Logo_vertical.png") !== false) {
         $recipe->photo_url = '';
     }
     if ($recipe->photo_url) {
         $recipe->photo_url = str_replace("/thumbs/", "/large/", $recipe->photo_url);
     }
     // Yield
     $yield = '';
     $nodes = $xpath->query('//option[@class="select-title"]');
     if ($nodes->length) {
         $yield .= trim($nodes->item(0)->nodeValue);
     }
     $nodes = $xpath->query('//p[@class="yieldUnits-txt"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $yield .= ' ' . ($value ? $value : 'servings');
     }
     $recipe->yield = trim($yield);
     // Ingredients (custom because of duplicate class attributes for "ingredients")
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@class = "pod ingredients"]/*');
     foreach ($nodes as $node) {
         # <h3> contains ingredient section names
         if ($node->nodeName == 'h3') {
             $recipe->addIngredientsSection(ucfirst(trim(strtolower($node->nodeValue))));
         }
         # Extract ingredients from <ul> <li>.
         if ($node->nodeName == 'ul') {
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 // Find <li> with class="ingredient" for each ingredient.
                 if ($ing_node->nodeName == 'li') {
                     $line = RecipeParser_Text::FormatAsOneLine($ing_node->nodeValue);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
Example #15
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // ---- OVERRIDES
     // Title
     $nodes = $xpath->query('//h3//strong');
     if ($nodes->length) {
         $line = RecipeParser_Text::formatAsOneLine($nodes->item(0)->nodeValue);
         $recipe->title = $line;
     }
     // Yield
     $nodes = $xpath->query('//span[@itemprop="articleBody"]//p');
     foreach ($nodes as $node) {
         $line = trim($node->nodeValue);
         if (strpos($line, "Yield") === 0 || strpos($line, "Serve") === 0) {
             $line = RecipeParser_Text::formatYield($line);
             $recipe->yield = $line;
             break;
         }
     }
     // Ingredients
     $nodes = $xpath->query('//span[@itemprop="articleBody"]//ul/li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//span[@itemprop="articleBody"]//ol/li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendInstruction($line);
     }
     // Image
     $nodes = $xpath->query('//meta[@property="og:image"]');
     foreach ($nodes as $node) {
         $line = $node->getAttribute("content");
         $recipe->photo_url = $line;
         break;
     }
     return $recipe;
 }
Example #16
0
 public static function parse($html, $url)
 {
     // Get all of the standard bits we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Titles include "recipe"
     if (preg_match("/ Recipe( - CHOW.com)?\$/", $recipe->title)) {
         $recipe->title = trim(preg_replace("/(.*) Recipe( - CHOW.com)?\$/", "\$1", $recipe->title));
     }
     // Strip leading numbers from instructions
     for ($i = 0; $i < count($recipe->instructions); $i++) {
         for ($j = 0; $j < count($recipe->instructions[$i]['list']); $j++) {
             $recipe->instructions[$i]['list'][$j] = preg_replace("/^\\d+(\\w.*)\$/", "\$1", $recipe->instructions[$i]['list'][$j]);
         }
     }
     // Ingredients (If none parsed)
     if (!count($recipe->ingredients[0]['list'])) {
         $nodes = $xpath->query('//*[@id="ingredients_list"]//li');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendIngredient($line);
         }
     }
     // Instructions (If none parsed)
     if (!count($recipe->instructions[0]['list'])) {
         $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendInstruction($line);
         }
     }
     // Cleanup description
     if ($recipe->description) {
         $recipe->description = preg_replace("/^(Read our review of|This (dish|recipe) was featured as part|See more recipes) .*\$/m", "", $recipe->description);
         $recipe->description = preg_replace("/[\r\n]{3,}/", "\n\n", $recipe->description);
         $recipe->description = trim($recipe->description);
     }
     return $recipe;
 }
Example #17
0
 /**
  * Cleanup for clipped HTML prior to parsing with RecipeParser.
  *
  * @param string HTML
  * @return string HTML
  */
 public static function cleanupClippedRecipeHtml($html)
 {
     $html = preg_replace('/(\\r\\n|\\r)/', "\n", $html);
     // Normalize line breaks
     $html = str_replace('&nbsp;', ' ', $html);
     // get rid of non-breaking space (html code)
     $html = str_replace('&#160;', ' ', $html);
     // get rid of non-breaking space (numeric)
     $html = preg_replace('/\\xC2\\xA0/', ' ', $html);
     // get rid of non-breaking space (UTF-8)
     $html = preg_replace('/[\\x{0096}-\\x{0097}]/u', '-', $html);
     // ndash, mdash (bonappetit)
     // Strip out script tags so they don't accidentally get executed if we ever display
     // clipped content to end-users.
     $html = RecipeParser_Text::stripTagAndContents('script', $html);
     return $html;
 }
Example #18
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield, Ingredients, Instructions
     $found_instructions = false;
     $found_ingredients = false;
     $nodes = $xpath->query('//*[@class="field field-name-body field-type-text-with-summary field-label-hidden"]//*[@class="field-item even"]');
     if ($nodes->length) {
         foreach ($nodes->item(0)->childNodes as $node) {
             $str = trim($node->nodeValue);
             // Yield
             if (!$recipe->yield && preg_match("/(makes|yields|serves|servings)/i", $str) && preg_match("/\\d/", $str)) {
                 $recipe->yield = RecipeParser_Text::formatYield($str);
                 continue;
             }
             // Ingredients and Instructions
             if ($str == "INGREDIENTS") {
                 $found_ingredients = true;
                 continue;
             }
             if ($str == "INSTRUCTIONS") {
                 $found_instructions = true;
                 continue;
             }
             if (!$found_ingredients) {
                 continue;
             } else {
                 if (!$found_instructions) {
                     $str = RecipeParser_Text::formatAsOneLine($str);
                     $recipe->appendIngredient($str);
                 } else {
                     $str = RecipeParser_Text::formatAsOneLine($str);
                     $str = RecipeParser_Text::stripLeadingNumbers($str);
                     $recipe->appendInstruction($str);
                 }
             }
         }
     }
     return $recipe;
 }
Example #19
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Overrides for data that isn't captured by their implementation of Schema.org.
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]');
     foreach ($nodes as $node) {
         $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         $recipe->appendInstruction($line);
     }
     return $recipe;
 }
Example #20
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataRdfDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//*[@class="ingredient"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     return $recipe;
 }
Example #21
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // ---- OVERRIDES
     // Credits
     if ($recipe->credits) {
         $recipe->credits = "Food52 (" . $recipe->credits . ")";
     } else {
         $recipe->credits = "Food52";
     }
     // Notes
     $line = "";
     $nodes = $xpath->query('.//span[@class="recipe-note"]');
     if ($nodes->length) {
         $nodes = $nodes->item(0)->childNodes;
         // go through 'childNodes' to get #text nodes
         foreach ($nodes as $node) {
             switch ($node->nodeName) {
                 case "br":
                     $line .= "\n";
                     break;
                 case "#text":
                 case "span":
                 case "strong":
                 case "b":
                 case "em":
                 case "i":
                 case "a":
                     $line .= $node->nodeValue . " ";
                     break;
             }
         }
     }
     $line = preg_replace("/^Author Notes:\\s*/", "", $line);
     $recipe->notes = RecipeParser_Text::formatAsParagraphs($line);
     return $recipe;
 }
Example #22
0
 public static function parse($html, $url)
 {
     // Get all of the standard hrecipe stuff we can find.
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//*[@name="resizeTo"]');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->getAttribute("value")) . " servings";
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredient ")]');
     foreach ($nodes as $node) {
         $parts = array();
         foreach ($node->childNodes as $n) {
             $parts[] = $n->nodeValue;
         }
         $line = implode(' ', $parts);
         $line = str_replace(" ; ", "; ", $line);
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//div[@class="display-field"]/p');
     foreach ($nodes as $node) {
         $line = trim($node->nodeValue);
         if ($line == strtoupper($line)) {
             $line = RecipeParser_Text::formatSectionName($line);
             $recipe->addInstructionsSection($line);
         } else {
             $recipe->appendInstruction($line);
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $sections = $xpath->query('//*[@id="ingredients"]//*[@class="group"]');
     if ($sections->length) {
         // Sections
         foreach ($sections as $section_node) {
             $section_nodes = $xpath->query('.//h3', $section_node);
             if ($section_nodes->length) {
                 $line = $section_nodes->item(0)->nodeValue;
                 $line = RecipeParser_Text::formatSectionName($line);
                 if (!empty($line)) {
                     $recipe->addIngredientsSection($line);
                 }
             }
             $ing_nodes = $xpath->query('.//li', $section_node);
             if ($ing_nodes->length) {
                 foreach ($ing_nodes as $node) {
                     $line = $node->nodeValue;
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     // Notes
     $nodes = $xpath->query('.//*[@class = "body-c note-text"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         $value = trim(str_replace("Cook's Note", '', $value));
         $recipe->notes = $value;
     }
     return $recipe;
 }
Example #24
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // --- Items not properly definied in Recipe.com's microformat markup.
     // Title -- Fallback if "fn" is not defined.
     if (!$recipe->title) {
         $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " heading1 ")]');
         if ($nodes->length) {
             $recipe->title = trim($nodes->item(0)->nodeValue);
         }
     }
     // Photo -- Fallback if "photo" is not defined.
     if (!$recipe->photo_url) {
         $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " PB10 ")]/img');
         if ($nodes->length) {
             $url = $nodes->item(0)->getAttribute('src');
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($url, $this->url);
         }
     }
     // Yield
     $nodes = $xpath->query('//*[@class="servingsize"]');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Credits
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " partnerName ")]');
     if ($nodes->length) {
         $line = RecipeParser_Text::FormatAsOneLine($nodes->item(0)->nodeValue);
         $line = preg_replace('/\\s*Recipe from\\s+(.*)$/', "\$1", $line);
         $recipe->credits = trim($line);
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     //
     // Some of the ingredient lines in on The Daily Meal do not adhere to
     // the usual microdata formatting.  Here we fall back to looking for a
     // regular list within a higher-level ingredients div.
     //
     if (!empty($recipe->ingredients)) {
         $nodes = $xpath->query("//div[@class='content']/div[@class='ingredient']/ul/li");
         foreach ($nodes as $node) {
             $value = RecipeParser_Text::formatAsOneLine($node->nodeValue);
             if (empty($value)) {
                 continue;
             }
             if (RecipeParser_Text::matchSectionName($value)) {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 $recipe->appendIngredient($value);
             }
         }
     }
     //
     // The Daily Meal provides servings details via Edamam's plugin.
     //
     if (!$recipe->yield) {
         $nodes = $xpath->query("//table[@class='edamam-data']/tr[2]/td[2]");
         if ($nodes->length) {
             $recipe->yield = RecipeParser_Text::formatYield($nodes->item(0)->nodeValue);
         }
     }
     return $recipe;
 }
Example #26
0
 public static function downloadRecipeWithCache($url)
 {
     $cache_ttl = 86400 * 3;
     // Target filename
     $filename = FileUtil::tempFilenameFromUrl($url);
     // Only fetch 1x per day
     if (file_exists($filename) && filesize($filename) > 0 && time() - filemtime($filename) < $cache_ttl) {
         error_log("Found file in cache: {$filename}");
         $html = file_get_contents($filename);
     } else {
         // Fetch and cleanup the HTML
         error_log("Downloading recipe from url: {$url}");
         $html = FileUtil::downloadPage($url);
         $html = RecipeParser_Text::forceUTF8($html);
         $html = RecipeParser_Text::cleanupClippedRecipeHtml($html);
         // Append some notes to the HTML
         $comments = RecipeParser_Text::getRecipeMetadataComment($url, "curl");
         $html = $comments . "\n\n" . $html;
         error_log("Saving recipe to file {$filename}");
         file_put_contents($filename, $html);
     }
     return $html;
 }
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id="ingredients-box"]//ul/li');
     foreach ($nodes as $node) {
         if ($node->getAttribute("itemprop")) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendIngredient($line);
         } else {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatSEctionName($line);
             $recipe->addIngredientsSection($line);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//*[@id="method-box"]//p');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         if ($line) {
             $recipe->appendInstruction($line);
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Notes
     $nodes = $xpath->query('//div[@class="rd_editornote margin_bottom"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $line = preg_replace("/Editor's Note:\\s+/", "", $line);
         $recipe->notes = $line;
     }
     // Override image
     $nodes = $xpath->query('//meta[@itemprop="image"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->getAttribute("content");
         $recipe->photo_url = $line;
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@class="col6 ingredients"]/*');
     foreach ($nodes as $node) {
         // Extract ingredients from <ul> <li>.
         if ($node->nodeName == 'ul') {
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 // Find <li> with itemprop="ingredients" for each ingredient.
                 if ($ing_node->nodeName == 'li' && $ing_node->getAttribute("itemprop") == "ingredients") {
                     $line = trim($ing_node->nodeValue);
                     // Section titles might be all uppercase ingredients
                     if ($line == strtoupper($line)) {
                         $line = RecipeParser_Text::formatSectionName($line);
                         $recipe->addIngredientsSection($line);
                         continue;
                     }
                     // Ingredient lines
                     if (stripos($line, "copyright") !== false) {
                         continue;
                     } else {
                         if (stripos($line, "recipe follows") !== false) {
                             continue;
                         } else {
                             $line = RecipeParser_Text::formatAsOneLine($line);
                             $recipe->appendIngredient($line);
                         }
                     }
                     // Section titles
                 } else {
                     if ($ing_node->nodeName == 'li' && $ing_node->getAttribute("class") == "subtitle") {
                         $line = trim($ing_node->nodeValue);
                         $line = RecipeParser_Text::formatSectionName($line);
                         $recipe->addIngredientsSection($line);
                     }
                 }
             }
             continue;
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]/*');
     foreach ($nodes as $node) {
         if ($node->nodeName == "span") {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             $recipe->addInstructionsSection($line);
         } else {
             if ($node->nodeName == "p") {
                 $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
                 if (!preg_match("/^Photograph/i", $line)) {
                     $recipe->appendInstruction($line);
                 }
             }
         }
     }
     // See if we've captured a chef's photo, and delete it (if so).
     if ($recipe->photo_url) {
         $nodes = $xpath->query('//a[@itemprop="url"]/img[@itemprop="image"]');
         if ($nodes->length > 0) {
             $url = $nodes->item(0)->getAttribute("src");
             if ($recipe->photo_url == $url) {
                 $recipe->photo_url = "";
             }
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//*[@class="prep_box"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         if (preg_match("/Number of Servings: (\\d+)/", $line, $m)) {
             $recipe->yield = RecipeParser_Text::formatYield($m[1]);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $str = "";
     $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]');
     if ($nodes->length) {
         $children = $nodes->item(0)->childNodes;
         // This is a piece of HTML that has <br> tags for breaks in each instruction.
         // Rather than just getting nodeValue, I want to preserve the <br> tags. So I'm
         // looking for them as nodes and appending them to the string. Any other nodes
         // (either #text or other, e.g. <a href="">) get passed along into the string as
         // nodeValue.
         foreach ($children as $child) {
             if ($child->nodeName == "br") {
                 $str .= "<br>";
             } else {
                 $line = trim($child->nodeValue);
                 if (!empty($line)) {
                     $str .= $line;
                 }
             }
         }
         $lines = explode("<br>", $str);
         foreach ($lines as $line) {
             if (empty($line)) {
                 continue;
             } else {
                 if (RecipeParser_Text::matchSectionName($line)) {
                     $line = RecipeParser_Text::formatSectionName($line);
                     $recipe->addInstructionsSection($line);
                 } else {
                     if (!empty($line)) {
                         $line = RecipeParser_Text::formatAsOneLine($line);
                         $line = RecipeParser_Text::stripLeadingNumbers($line);
                         if (stripos($line, "Recipe submitted by SparkPeople") === 0) {
                             continue;
                         }
                         if (stripos($line, "Number of Servings:") === 0) {
                             continue;
                         }
                         $recipe->appendInstruction($line);
                     }
                 }
             }
         }
     }
     return $recipe;
 }