public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Photo URL, use larger version found on MyRecipes $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url); // Ingredients $recipe->resetIngredients(); $nodes = $xpath->query('//div[@class="recipeDetails"]/ul'); foreach ($nodes->item(0)->childNodes as $li) { if ($li->nodeName == 'li') { $text = RecipeParser_Text::FormatAsOneLine($li->nodeValue); if ($li->getAttribute('itemprop') == 'ingredient') { $text = trim(str_replace('$Click to see savings', '', $text)); $recipe->appendIngredient($text); } else { $text = RecipeParser_Text::formatSectionName($text); $recipe->addIngredientsSection($text); } } } // Credits $nodes = $xpath->query('//*[@itemprop="author"]'); if ($nodes->length) { $line = trim($nodes->item(0)->nodeValue); $recipe->credits = $line; } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Notes -- Collect the non-standard cook times and baking temps, // and also any tips/notes that appear at the end of the recipe instructions. $notes = array(); $nodes = $xpath->query('//*[@class="recipeTips"]//li'); foreach ($nodes as $node) { $value = RecipeParser_Text::FormatAsOneLine($node->nodeValue); $value = preg_replace("/^(Tip|Note)\\s*(.*)\$/", "\$2", $value); $notes[] = $value; } $nodes = $xpath->query('//*[@class="recipeInfo"]//*[@class="type"]'); foreach ($nodes as $node) { $value = RecipeParser_Text::formatAsOneLine($node->nodeValue); if (strpos($value, "Makes:") !== false) { continue; } $notes[] = $value; } $recipe->notes = implode("\n\n", $notes); // Adjust Photo URL for larger dimensions $recipe->photo_url = preg_replace("/\\/l_([^\\/]+)/", "/550_\$1", $recipe->photo_url); return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url); // Turn off libxml errors to prevent mismatched tag warnings. libxml_use_internal_errors(true); $doc = new DOMDocument(); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // Photo -- skip logo if it was used in place of photo if (strpos($recipe->photo_url, "FDC_Logo_vertical.png") !== false) { $recipe->photo_url = ''; } if ($recipe->photo_url) { $recipe->photo_url = str_replace("/thumbs/", "/large/", $recipe->photo_url); } // Yield $yield = ''; $nodes = $xpath->query('//option[@class="select-title"]'); if ($nodes->length) { $yield .= trim($nodes->item(0)->nodeValue); } $nodes = $xpath->query('//p[@class="yieldUnits-txt"]'); if ($nodes->length) { $value = trim($nodes->item(0)->nodeValue); $yield .= ' ' . ($value ? $value : 'servings'); } $recipe->yield = trim($yield); // Ingredients (custom because of duplicate class attributes for "ingredients") $recipe->resetIngredients(); $nodes = $xpath->query('//div[@class = "pod ingredients"]/*'); foreach ($nodes as $node) { # <h3> contains ingredient section names if ($node->nodeName == 'h3') { $recipe->addIngredientsSection(ucfirst(trim(strtolower($node->nodeValue)))); } # Extract ingredients from <ul> <li>. if ($node->nodeName == 'ul') { $ing_nodes = $node->childNodes; foreach ($ing_nodes as $ing_node) { // Find <li> with class="ingredient" for each ingredient. if ($ing_node->nodeName == 'li') { $line = RecipeParser_Text::FormatAsOneLine($ing_node->nodeValue); $recipe->appendIngredient($line); } } } } return $recipe; }
public static function parse($html, $url) { $recipe = RecipeParser_Parser_Microformat::parse($html, $url); libxml_use_internal_errors(true); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); $doc = new DOMDocument(); $doc->loadHTML('<?xml encoding="UTF-8">' . $html); $xpath = new DOMXPath($doc); // --- Items not properly definied in Recipe.com's microformat markup. // Title -- Fallback if "fn" is not defined. if (!$recipe->title) { $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " heading1 ")]'); if ($nodes->length) { $recipe->title = trim($nodes->item(0)->nodeValue); } } // Photo -- Fallback if "photo" is not defined. if (!$recipe->photo_url) { $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " PB10 ")]/img'); if ($nodes->length) { $url = $nodes->item(0)->getAttribute('src'); $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($url, $this->url); } } // Yield $nodes = $xpath->query('//*[@class="servingsize"]'); if ($nodes->length) { $line = trim($nodes->item(0)->nodeValue); $recipe->yield = RecipeParser_Text::formatYield($line); } // Credits $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " partnerName ")]'); if ($nodes->length) { $line = RecipeParser_Text::FormatAsOneLine($nodes->item(0)->nodeValue); $line = preg_replace('/\\s*Recipe from\\s+(.*)$/', "\$1", $line); $recipe->credits = trim($line); } return $recipe; }