public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Photo URL, use larger version found on MyRecipes $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url); // Ingredients $recipe->resetIngredients(); $nodes = $xpath->query('//div[@class="recipeDetails"]/ul'); foreach ($nodes->item(0)->childNodes as $li) { if ($li->nodeName == 'li') { $text = RecipeParser_Text::FormatAsOneLine($li->nodeValue); if ($li->getAttribute('itemprop') == 'ingredient') { $text = trim(str_replace('$Click to see savings', '', $text)); $recipe->appendIngredient($text); } else { $text = RecipeParser_Text::formatSectionName($text); $recipe->addIngredientsSection($text); } } } // Credits $nodes = $xpath->query('//*[@itemprop="author"]'); if ($nodes->length) { $line = trim($nodes->item(0)->nodeValue); $recipe->credits = $line; } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Title missing? if (!$recipe->title) { $nodes = $xpath->query('//meta[@property="og:title"]'); if ($nodes->length) { $line = $nodes->item(0)->getAttribute("content"); $line = RecipeParser_Text::formatTitle($line); $recipe->title = $line; } } // Photo URL, use larger version found on MyRecipes $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url); // Credits $nodes = $xpath->query('//*[@class="link-list"]/h4'); if ($nodes->length) { $line = trim($nodes->item(0)->nodeValue); if (strpos($line, "More from") === 0) { $line = str_replace("More from ", "", $line); $recipe->credits = $line; } } // Times $searches = array('prep' => 'prep: ', 'cook' => 'cook: ', 'total' => 'total: '); $nodes = $xpath->query('//*[@class="recipe-time-info"]'); foreach ($nodes as $node) { $line = trim(strtolower($node->nodeValue)); foreach ($searches as $key => $value) { if (strpos($line, $value) === 0) { $line = str_replace($value, "", $line); $recipe->time[$key] = RecipeParser_Times::toMinutes($line); } } } // Clean up each of the ingredients to remove "$Click to see savings" // These don't come through in the curl'ed test files for ($i = 0; $i < count($recipe->ingredients); $i++) { for ($j = 0; $j < count($recipe->ingredients[$i]['list']); $j++) { if (strpos($recipe->ingredients[$i]['list'][$j], "\$") > 0) { $recipe->ingredients[$i]['list'][$j] = substr($recipe->ingredients[$i]['list'][$j], 0, strpos($recipe->ingredients[$i]['list'][$j], "\$")); } } } return $recipe; }
public static function parse($html, $url) { // Get all of the standard bits we can find. $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); // Titles include "recipe" if (preg_match("/ Recipe\$/", $recipe->title)) { $recipe->title = trim(preg_replace("/(.*) Recipe\$/", "\$1", $recipe->title)); } // Cleanup description if ($recipe->description) { $recipe->description = preg_replace("/^(Read our review of|This (dish|recipe) was featured as part|See more recipes) .*\$/m", "", $recipe->description); $recipe->description = preg_replace("/[\r\n]{3,}/", "\n\n", $recipe->description); $recipe->description = trim($recipe->description); } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Yield, Ingredients, Instructions $found_instructions = false; $found_ingredients = false; $nodes = $xpath->query('//*[@class="field field-name-body field-type-text-with-summary field-label-hidden"]//*[@class="field-item even"]'); if ($nodes->length) { foreach ($nodes->item(0)->childNodes as $node) { $str = trim($node->nodeValue); // Yield if (!$recipe->yield && preg_match("/(makes|yields|serves|servings)/i", $str) && preg_match("/\\d/", $str)) { $recipe->yield = RecipeParser_Text::formatYield($str); continue; } // Ingredients and Instructions if ($str == "INGREDIENTS") { $found_ingredients = true; continue; } if ($str == "INSTRUCTIONS") { $found_instructions = true; continue; } if (!$found_ingredients) { continue; } else { if (!$found_instructions) { $str = RecipeParser_Text::formatAsOneLine($str); $recipe->appendIngredient($str); } else { $str = RecipeParser_Text::formatAsOneLine($str); $str = RecipeParser_Text::stripLeadingNumbers($str); $recipe->appendInstruction($str); } } } } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // // Some of the ingredient lines in on The Daily Meal do not adhere to // the usual microdata formatting. Here we fall back to looking for a // regular list within a higher-level ingredients div. // if (!empty($recipe->ingredients)) { $nodes = $xpath->query("//div[@class='content']/div[@class='ingredient']/ul/li"); foreach ($nodes as $node) { $value = RecipeParser_Text::formatAsOneLine($node->nodeValue); if (empty($value)) { continue; } if (RecipeParser_Text::matchSectionName($value)) { $value = RecipeParser_Text::formatSectionName($value); $recipe->addIngredientsSection($value); } else { $recipe->appendIngredient($value); } } } // // The Daily Meal provides servings details via Edamam's plugin. // if (!$recipe->yield) { $nodes = $xpath->query("//table[@class='edamam-data']/tr[2]/td[2]"); if ($nodes->length) { $recipe->yield = RecipeParser_Text::formatYield($nodes->item(0)->nodeValue); } } return $recipe; }
public static function parse($html, $url) { // Get all of the standard microdata stuff we can find. $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $doc = new DOMDocument(); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Ingredients $recipe->resetIngredients(); $nodes = $xpath->query('//div[@id="ingredients-box"]//ul/li'); foreach ($nodes as $node) { if ($node->getAttribute("itemprop")) { $line = $node->nodeValue; $line = RecipeParser_Text::formatAsOneLine($line); $recipe->appendIngredient($line); } else { $line = $node->nodeValue; $line = RecipeParser_Text::formatSEctionName($line); $recipe->addIngredientsSection($line); } } // Instructions $recipe->resetInstructions(); $nodes = $xpath->query('//*[@id="method-box"]//p'); foreach ($nodes as $node) { $line = $node->nodeValue; $line = RecipeParser_Text::formatAsOneLine($line); if ($line) { $recipe->appendInstruction($line); } } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Ingredients $recipe->resetIngredients(); $nodes = $xpath->query('//*[@id="v_ingredients"]//*[@id="IngredientSet"]'); foreach ($nodes as $node) { $children = $xpath->query('.//*[@id="IngredientHeading"]', $node); if ($children->length) { $line = $children->item(0)->nodeValue; $line = RecipeParser_Text::formatSectionName($line); $recipe->addIngredientsSection($line); } $children = $xpath->query('.//*[@id="IngredientLine"]', $node); foreach ($children as $child) { $line = $child->nodeValue; $line = RecipeParser_Text::formatAsOneLine($line); $recipe->appendIngredient($line); } } // Instructions $recipe->resetInstructions(); $str = ""; $nodes = $xpath->query('//*[@itemprop="instructions"]'); if ($nodes->length) { $children = $nodes->item(0)->childNodes; // This is a piece of HTML that has <br> tags for breaks in each instruction. // Rather than just getting nodeValue, I want to preserve the <br> tags. So I'm // looking for them as nodes and appending them to the string. Any other nodes // (either #text or other, e.g. <a href="">) get passed along into the string as // nodeValue. foreach ($children as $child) { if ($child->nodeName == "br") { $str .= "<br>"; } else { if ($child->nodeName == "b") { $str .= "SECTION:" . $child->nodeValue; } else { $line = $child->nodeValue; if (preg_match("/\\S/", $line)) { $str .= $line; } } } } $lines = explode("<br>", $str); foreach ($lines as $line) { if (strpos($line, "SECTION:") === 0) { $line = substr($line, 8); $line = RecipeParser_Text::formatSectionName($line); $recipe->addInstructionsSection($line); } else { $line = RecipeParser_Text::formatAsOneLine($line); $line = RecipeParser_Text::stripLeadingNumbers($line); if (stripos($line, "yield:") === 0) { continue; } $recipe->appendInstruction($line); } } } return $recipe; }