Exemplo n.º 1
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Photo URL, use larger version found on MyRecipes
     $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@class="recipeDetails"]/ul');
     foreach ($nodes->item(0)->childNodes as $li) {
         if ($li->nodeName == 'li') {
             $text = RecipeParser_Text::FormatAsOneLine($li->nodeValue);
             if ($li->getAttribute('itemprop') == 'ingredient') {
                 $text = trim(str_replace('$Click to see savings', '', $text));
                 $recipe->appendIngredient($text);
             } else {
                 $text = RecipeParser_Text::formatSectionName($text);
                 $recipe->addIngredientsSection($text);
             }
         }
     }
     // Credits
     $nodes = $xpath->query('//*[@itemprop="author"]');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         $recipe->credits = $line;
     }
     return $recipe;
 }
Exemplo n.º 2
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title missing?
     if (!$recipe->title) {
         $nodes = $xpath->query('//meta[@property="og:title"]');
         if ($nodes->length) {
             $line = $nodes->item(0)->getAttribute("content");
             $line = RecipeParser_Text::formatTitle($line);
             $recipe->title = $line;
         }
     }
     // Photo URL, use larger version found on MyRecipes
     $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url);
     // Credits
     $nodes = $xpath->query('//*[@class="link-list"]/h4');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         if (strpos($line, "More from") === 0) {
             $line = str_replace("More from ", "", $line);
             $recipe->credits = $line;
         }
     }
     // Times
     $searches = array('prep' => 'prep: ', 'cook' => 'cook: ', 'total' => 'total: ');
     $nodes = $xpath->query('//*[@class="recipe-time-info"]');
     foreach ($nodes as $node) {
         $line = trim(strtolower($node->nodeValue));
         foreach ($searches as $key => $value) {
             if (strpos($line, $value) === 0) {
                 $line = str_replace($value, "", $line);
                 $recipe->time[$key] = RecipeParser_Times::toMinutes($line);
             }
         }
     }
     // Clean up each of the ingredients to remove "$Click to see savings"
     // These don't come through in the curl'ed test files
     for ($i = 0; $i < count($recipe->ingredients); $i++) {
         for ($j = 0; $j < count($recipe->ingredients[$i]['list']); $j++) {
             if (strpos($recipe->ingredients[$i]['list'][$j], "\$") > 0) {
                 $recipe->ingredients[$i]['list'][$j] = substr($recipe->ingredients[$i]['list'][$j], 0, strpos($recipe->ingredients[$i]['list'][$j], "\$"));
             }
         }
     }
     return $recipe;
 }
Exemplo n.º 3
0
 public static function parse($html, $url)
 {
     // Get all of the standard bits we can find.
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     // Titles include "recipe"
     if (preg_match("/ Recipe\$/", $recipe->title)) {
         $recipe->title = trim(preg_replace("/(.*) Recipe\$/", "\$1", $recipe->title));
     }
     // Cleanup description
     if ($recipe->description) {
         $recipe->description = preg_replace("/^(Read our review of|This (dish|recipe) was featured as part|See more recipes) .*\$/m", "", $recipe->description);
         $recipe->description = preg_replace("/[\r\n]{3,}/", "\n\n", $recipe->description);
         $recipe->description = trim($recipe->description);
     }
     return $recipe;
 }
Exemplo n.º 4
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield, Ingredients, Instructions
     $found_instructions = false;
     $found_ingredients = false;
     $nodes = $xpath->query('//*[@class="field field-name-body field-type-text-with-summary field-label-hidden"]//*[@class="field-item even"]');
     if ($nodes->length) {
         foreach ($nodes->item(0)->childNodes as $node) {
             $str = trim($node->nodeValue);
             // Yield
             if (!$recipe->yield && preg_match("/(makes|yields|serves|servings)/i", $str) && preg_match("/\\d/", $str)) {
                 $recipe->yield = RecipeParser_Text::formatYield($str);
                 continue;
             }
             // Ingredients and Instructions
             if ($str == "INGREDIENTS") {
                 $found_ingredients = true;
                 continue;
             }
             if ($str == "INSTRUCTIONS") {
                 $found_instructions = true;
                 continue;
             }
             if (!$found_ingredients) {
                 continue;
             } else {
                 if (!$found_instructions) {
                     $str = RecipeParser_Text::formatAsOneLine($str);
                     $recipe->appendIngredient($str);
                 } else {
                     $str = RecipeParser_Text::formatAsOneLine($str);
                     $str = RecipeParser_Text::stripLeadingNumbers($str);
                     $recipe->appendInstruction($str);
                 }
             }
         }
     }
     return $recipe;
 }
Exemplo n.º 5
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     //
     // Some of the ingredient lines in on The Daily Meal do not adhere to
     // the usual microdata formatting.  Here we fall back to looking for a
     // regular list within a higher-level ingredients div.
     //
     if (!empty($recipe->ingredients)) {
         $nodes = $xpath->query("//div[@class='content']/div[@class='ingredient']/ul/li");
         foreach ($nodes as $node) {
             $value = RecipeParser_Text::formatAsOneLine($node->nodeValue);
             if (empty($value)) {
                 continue;
             }
             if (RecipeParser_Text::matchSectionName($value)) {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 $recipe->appendIngredient($value);
             }
         }
     }
     //
     // The Daily Meal provides servings details via Edamam's plugin.
     //
     if (!$recipe->yield) {
         $nodes = $xpath->query("//table[@class='edamam-data']/tr[2]/td[2]");
         if ($nodes->length) {
             $recipe->yield = RecipeParser_Text::formatYield($nodes->item(0)->nodeValue);
         }
     }
     return $recipe;
 }
Exemplo n.º 6
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id="ingredients-box"]//ul/li');
     foreach ($nodes as $node) {
         if ($node->getAttribute("itemprop")) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendIngredient($line);
         } else {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatSEctionName($line);
             $recipe->addIngredientsSection($line);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//*[@id="method-box"]//p');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         if ($line) {
             $recipe->appendInstruction($line);
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//*[@id="v_ingredients"]//*[@id="IngredientSet"]');
     foreach ($nodes as $node) {
         $children = $xpath->query('.//*[@id="IngredientHeading"]', $node);
         if ($children->length) {
             $line = $children->item(0)->nodeValue;
             $line = RecipeParser_Text::formatSectionName($line);
             $recipe->addIngredientsSection($line);
         }
         $children = $xpath->query('.//*[@id="IngredientLine"]', $node);
         foreach ($children as $child) {
             $line = $child->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendIngredient($line);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $str = "";
     $nodes = $xpath->query('//*[@itemprop="instructions"]');
     if ($nodes->length) {
         $children = $nodes->item(0)->childNodes;
         // This is a piece of HTML that has <br> tags for breaks in each instruction.
         // Rather than just getting nodeValue, I want to preserve the <br> tags. So I'm
         // looking for them as nodes and appending them to the string. Any other nodes
         // (either #text or other, e.g. <a href="">) get passed along into the string as
         // nodeValue.
         foreach ($children as $child) {
             if ($child->nodeName == "br") {
                 $str .= "<br>";
             } else {
                 if ($child->nodeName == "b") {
                     $str .= "SECTION:" . $child->nodeValue;
                 } else {
                     $line = $child->nodeValue;
                     if (preg_match("/\\S/", $line)) {
                         $str .= $line;
                     }
                 }
             }
         }
         $lines = explode("<br>", $str);
         foreach ($lines as $line) {
             if (strpos($line, "SECTION:") === 0) {
                 $line = substr($line, 8);
                 $line = RecipeParser_Text::formatSectionName($line);
                 $recipe->addInstructionsSection($line);
             } else {
                 $line = RecipeParser_Text::formatAsOneLine($line);
                 $line = RecipeParser_Text::stripLeadingNumbers($line);
                 if (stripos($line, "yield:") === 0) {
                     continue;
                 }
                 $recipe->appendInstruction($line);
             }
         }
     }
     return $recipe;
 }