public static function parse($html, $url) { // Get all of the standard hrecipe stuff we can find. $recipe = RecipeParser_Parser_Microformat::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Multi-stage ingredients $nodes = $xpath->query('//dl[@id="stages"]/*'); if ($nodes->length) { $recipe->resetIngredients(); foreach ($nodes as $node) { if ($node->nodeName == 'dt') { $value = $node->nodeValue; $value = RecipeParser_Text::formatSectionName($value); $recipe->addIngredientsSection($value); } else { if ($node->nodeName == 'dd') { $subs = $xpath->query('.//*[@class="ingredient"]', $node); foreach ($subs as $sub) { $value = trim($sub->nodeValue); $recipe->appendIngredient($value); } } } } } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_Microformat::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); $hrecipe = $xpath->query('//section[@role="main"]'); if ($hrecipe->length) { $hrecipe = $hrecipe->item(0); // Title is not marked up with class="fn" $nodes = $xpath->query('.//h1', $hrecipe); if ($nodes->length) { $value = $nodes->item(0)->nodeValue; $recipe->title = RecipeParser_Text::formatTitle($value); } // Yield -- Class names are conflated $nodes = $xpath->query('.//*[@class="info yield"]', $hrecipe); if ($nodes->length) { $line = $nodes->item(0)->nodeValue; $recipe->yield = RecipeParser_Text::formatYield($line); } $nodes = $xpath->query('.//span[@itemprop="recipeYield"]', $hrecipe); if ($nodes->length) { $line = $nodes->item(0)->nodeValue; $recipe->yield = RecipeParser_Text::formatYield($line); } // Prep Times -- Class names are conflated $nodes = $xpath->query('.//*[@class="info preptime"]', $hrecipe); if ($nodes->length) { $value = $nodes->item(0)->nodeValue; $recipe->time['prep'] = RecipeParser_Times::toMinutes($value); } // Total Time / Duration -- Class names are conflated $nodes = $xpath->query('.//*[@class="info duration"]', $hrecipe); if ($nodes->length) { $value = $nodes->item(0)->nodeValue; $recipe->time['total'] = RecipeParser_Times::toMinutes($value); } } // Photo $nodes = $xpath->query('//section[@class="content-unit"]/img'); if ($nodes->length) { $photo_url = $nodes->item(0)->getAttribute('src'); if ($photo_url) { $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url); } } // Remove recipe title intros -- e.g. "Sunday Dinner: Pork Ribs" changes to "Pork Ribs" if (strpos($recipe->title, ": ") !== false) { $recipe->title = preg_replace("/^[^:]+: (.+)/", "\$1", $recipe->title); } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_Microformat::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Yield $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " yield ")]'); if ($nodes->length) { $line = $nodes->item(0)->nodeValue; $recipe->yield = RecipeParser_Text::formatYield($line); } // Times $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " prepTime ")]/span'); if ($nodes->length) { $line = $nodes->item(1)->getAttribute("title"); $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($line); } $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " rspec-cook-time ")]/span'); if ($nodes->length) { $line = $nodes->item(1)->getAttribute("title"); $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($line); } $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " totaltime ")]/span'); if ($nodes->length) { $line = $nodes->item(1)->getAttribute("title"); $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($line); } // Ingredients $recipe->resetIngredients(); $ing_nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredients ")]/*'); foreach ($ing_nodes as $ing_node) { if ($ing_node->getAttribute('class') == "ingr-divider") { $line = RecipeParser_Text::formatSectionName($ing_node->nodeValue); $recipe->addIngredientsSection($line); continue; } // Extract ingredients from inside of <ul class="ingredientsList"> // Child nodes should all be <li> if ($ing_node->nodeName == 'ul') { foreach ($ing_node->childNodes as $node) { $line = trim($node->nodeValue); $recipe->appendIngredient($line); } continue; } } return $recipe; }
public static function parse($html, $url) { // Get all of the standard hrecipe stuff we can find. $recipe = RecipeParser_Parser_Microformat::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Yield $nodes = $xpath->query('//*[@name="resizeTo"]'); if ($nodes->length) { $line = trim($nodes->item(0)->getAttribute("value")) . " servings"; $recipe->yield = RecipeParser_Text::formatYield($line); } // Ingredients $recipe->resetIngredients(); $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredient ")]'); foreach ($nodes as $node) { $parts = array(); foreach ($node->childNodes as $n) { $parts[] = $n->nodeValue; } $line = implode(' ', $parts); $line = str_replace(" ; ", "; ", $line); $line = RecipeParser_Text::formatAsOneLine($line); $recipe->appendIngredient($line); } // Instructions $recipe->resetInstructions(); $nodes = $xpath->query('//div[@class="display-field"]/p'); foreach ($nodes as $node) { $line = trim($node->nodeValue); if ($line == strtoupper($line)) { $line = RecipeParser_Text::formatSectionName($line); $recipe->addInstructionsSection($line); } else { $recipe->appendInstruction($line); } } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_Microformat::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // --- Items not properly definied in Recipe.com's microformat markup. // Title -- Fallback if "fn" is not defined. if (!$recipe->title) { $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " heading1 ")]'); if ($nodes->length) { $recipe->title = trim($nodes->item(0)->nodeValue); } } // Photo -- Fallback if "photo" is not defined. if (!$recipe->photo_url) { $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " PB10 ")]/img'); if ($nodes->length) { $url = $nodes->item(0)->getAttribute('src'); $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($url, $this->url); } } // Yield $nodes = $xpath->query('//*[@class="servingsize"]'); if ($nodes->length) { $line = trim($nodes->item(0)->nodeValue); $recipe->yield = RecipeParser_Text::formatYield($line); } // Credits $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " partnerName ")]'); if ($nodes->length) { $line = RecipeParser_Text::FormatAsOneLine($nodes->item(0)->nodeValue); $line = preg_replace('/\\s*Recipe from\\s+(.*)$/', "\$1", $line); $recipe->credits = trim($line); } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_Microformat::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Description $description = ""; $nodes = $xpath->query('//div[@id="recipe"]/p/i'); foreach ($nodes as $node) { $line = trim($node->nodeValue); if (strpos($line, "Adapted from") === false) { $description .= $line . "\n\n"; } } $description = trim($description); $recipe->description = $description; // Ingredients $recipe->resetIngredients(); $lines = array(); // Add ingredients to blob $nodes = $xpath->query('//div[@id="recipe"]/blockquote/p'); foreach ($nodes as $node) { foreach ($node->childNodes as $child) { $line = trim($child->nodeValue); switch ($child->nodeName) { case "strong": case "b": if (strpos($line, ":") === false) { $line .= ":"; } $lines[] = $line; break; case "#text": case "div": case "p": $lines[] = $line; break; } } } foreach ($lines as $line) { if (RecipeParser_Text::matchSectionName($line)) { $recipe->addIngredientsSection(RecipeParser_Text::formatSectionName($line)); } else { $line = RecipeParser_Text::formatAsOneLine($line); $recipe->appendIngredient($line); } } // Instructions $recipe->resetInstructions(); $lines = array(); $nodes = $xpath->query('//div[@id="recipe"]/*'); $passed_ingredients = false; foreach ($nodes as $node) { if ($node->nodeName == "blockquote") { $passed_ingredients = true; continue; } if ($node->nodeName == "p") { if ($passed_ingredients) { $line = trim($node->nodeValue); // Finished with ingredients once we hit "Adapted" notes or any <p> // with a class attribute. if (stripos($line, "Adapted from") !== false) { break; } else { if ($node->getAttribute("class")) { break; } } // Servings? if (stripos($line, "Serves ") === 0) { $recipe->yield = RecipeParser_Text::formatYield($line); continue; } $recipe->appendInstruction(RecipeParser_Text::formatAsOneLine($node->nodeValue)); } } } return $recipe; }
public static function parse($html, $url) { // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); $recipe = RecipeParser_Parser_Microformat::parse($html, $url); // OVERRIDES FOR ABOUT.COM // Cook times $node_list = $xpath->query('//div[@id = "articlebody"]/h3'); foreach ($node_list as $node) { $line = $node->nodeValue; $line = preg_replace('/[\\s\\"]+/', ' ', $line); $line = trim($line); if (preg_match("/prep time\\:(.+)/i", $line, $m)) { $recipe->time['prep'] = RecipeParser_Times::toMinutes($m[1]); } else { if (preg_match("/cook time\\:(.+)/i", $line, $m)) { $recipe->time['cook'] = RecipeParser_Times::toMinutes($m[1]); } } // Total time is provided as part of microformat markup for About.com } // Instructions $recipe->resetInstructions(); $nodes = $xpath->query('//div[@class = "instructions"]'); foreach ($nodes as $node) { $text = trim($node->nodeValue); $lines = preg_split("/[\n\r]+/", $text); for ($i = count($lines) - 1; $i >= 0; $i--) { $lines[$i] = trim($lines[$i]); // Remove ends of lines that have the word "recipes" squashed up against // another word, which seems to happen with long lists of related // recipe links. // Remove lines that have the phrase "Xxxxx Recipes and More". // Remove lines that have the phrase "Xxxxx Recipes | Xxxxx". // Remove mentions of newsletters. $lines[$i] = preg_replace("/(.*)recipes\\w/i", "\$1", $lines[$i]); $lines[$i] = preg_replace("/(.*)More .* Recipes.*/", "\$1", $lines[$i]); $lines[$i] = preg_replace("/(.*)Recipes and More.*/", "\$1", $lines[$i]); $lines[$i] = preg_replace("/(.*)Recipes \\| .*/", "\$1", $lines[$i]); $lines[$i] = preg_replace("/(.*)Recipe Newsletter.*/", "\$1", $lines[$i]); // Look for a line in the instructions that looks like a yield. if (strpos($lines[$i], "Makes ") === 0) { $recipe->yield = substr($lines[$i], 6); $lines[$i] = ''; continue; } } foreach ($lines as $line) { $line = trim($line); if (empty($line)) { continue; } // Match section names that read something like "---For the cake: Raise the oven temperature..." if (preg_match("/^(?:-{2,})?For the (.+)\\: (.*)\$/i", $line, $m)) { $section = $m[1]; $section = RecipeParser_Text::formatSectionName($section); $recipe->addInstructionsSection($section); // Reset the value of $line, without the section name. $line = ucfirst($m[2]); } $recipe->appendInstruction($line); } } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_Microformat::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); if (!$recipe->title) { $nodes = $xpath->query('//div[@itemprop="name"]'); if ($nodes->length) { $line = $nodes->item(0)->nodeValue; $line = RecipeParser_Text::formatTitle($line); $recipe->title = $line; } } if (!$recipe->yield) { $nodes = $xpath->query('//div[@class="box"]/div'); foreach ($nodes as $node) { $line = trim($node->nodeValue); if (stripos($line, "makes") === 0) { $line = RecipeParser_Text::formatYield($line); $recipe->yield = $line; break; } } } if (!count($recipe->ingredients[0]["list"])) { $nodes = $xpath->query('//ul[@class="ingredients"]'); if ($nodes->length) { $nodes = $nodes->item(0)->childNodes; $str = ""; foreach ($nodes as $node) { if (in_array($node->nodeName, array("li"))) { $line = $node->nodeValue; $str .= $line . "<br>"; } } $lines = explode("<br>", $str); foreach ($lines as $line) { $line = RecipeParser_Text::formatAsOneLine($line); $recipe->appendIngredient($line); } } } if (!count($recipe->instructions[0]["list"])) { $nodes = $xpath->query('//div[@class="instructions"]/ol/li'); foreach ($nodes as $node) { $line = $node->nodeValue; $line = RecipeParser_Text::formatAsOneLine($line); $recipe->appendInstruction($line); } } if (!$recipe->photo_url) { $nodes = $xpath->query('//meta[@property="og:image"]'); foreach ($nodes as $node) { $line = $node->getAttribute("content"); if (strpos($line, "wp-content") !== false) { $recipe->photo_url = $line; break; } } } return $recipe; }
public static function parse($html, $url) { // Get all of the standard hrecipe stuff we can find. $recipe = RecipeParser_Parser_Microformat::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); $recipe->resetIngredients(); $recipeName = $xpath->query('.//*[@itemprop="name"]'); $value = trim($recipeName[0]->nodeValue); $recipe->title = $value; $nodes = $xpath->query('//li[@itemprop="recipeInstructions"]/*'); if ($nodes->length) { foreach ($nodes as $sub) { $line = trim($sub->nodeValue); $line = RecipeParser_Text::stripLeadingNumbers($line); $recipe->appendInstruction($line); } } $image = $xpath->query('.//*[@itemprop="image"]'); $photo_url = $image[0]->getAttribute('src'); $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url); // Meta data $nodes = $xpath->query('//div[@class="recipe-metadata-wrap"]/*'); if ($nodes->length) { $prepTime = $xpath->query('.//*[@itemprop="prepTime"]'); foreach ($prepTime[0]->attributes as $sub) { if ($sub->nodeName == "content") { $value = trim($sub->nodeValue); $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($value); } } $prepTime = $xpath->query('.//*[@itemprop="cookTime"]'); foreach ($prepTime[0]->attributes as $sub) { if ($sub->nodeName == "content") { $value = trim($sub->nodeValue); $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($value); } } $recipe->time['total'] = $recipe->time['cook'] + $recipe->time['prep']; $recipeYield = $xpath->query('.//*[@itemprop="recipeYield"]'); $value = trim($recipeYield[0]->nodeValue); $recipe->yield = RecipeParser_Text::formatYield($value); } // Multi-stage ingredients $nodes = $xpath->query('//div[@class="recipe-ingredients-wrapper"]/*'); if ($nodes->length) { foreach ($nodes as $node) { if ($node->nodeName == 'h3') { $value = $node->nodeValue; $value = RecipeParser_Text::formatSectionName($value); $recipe->addIngredientsSection($value); } else { if ($node->nodeName == 'ul') { $subs = $xpath->query('.//li[@itemprop="ingredients"]', $node); foreach ($subs as $sub) { $value = trim($sub->nodeValue); $recipe->appendIngredient($value); } } } } } return $recipe; }