Example #1
0
 public static function parse($html, $url)
 {
     // Get all of the standard hrecipe stuff we can find.
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Multi-stage ingredients
     $nodes = $xpath->query('//dl[@id="stages"]/*');
     if ($nodes->length) {
         $recipe->resetIngredients();
         foreach ($nodes as $node) {
             if ($node->nodeName == 'dt') {
                 $value = $node->nodeValue;
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 if ($node->nodeName == 'dd') {
                     $subs = $xpath->query('.//*[@class="ingredient"]', $node);
                     foreach ($subs as $sub) {
                         $value = trim($sub->nodeValue);
                         $recipe->appendIngredient($value);
                     }
                 }
             }
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $hrecipe = $xpath->query('//section[@role="main"]');
     if ($hrecipe->length) {
         $hrecipe = $hrecipe->item(0);
         // Title is not marked up with class="fn"
         $nodes = $xpath->query('.//h1', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->title = RecipeParser_Text::formatTitle($value);
         }
         // Yield -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info yield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         $nodes = $xpath->query('.//span[@itemprop="recipeYield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Prep Times -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info preptime"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->time['prep'] = RecipeParser_Times::toMinutes($value);
         }
         // Total Time / Duration -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info duration"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->time['total'] = RecipeParser_Times::toMinutes($value);
         }
     }
     // Photo
     $nodes = $xpath->query('//section[@class="content-unit"]/img');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute('src');
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
     }
     // Remove recipe title intros -- e.g. "Sunday Dinner: Pork Ribs" changes to "Pork Ribs"
     if (strpos($recipe->title, ": ") !== false) {
         $recipe->title = preg_replace("/^[^:]+: (.+)/", "\$1", $recipe->title);
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " yield ")]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Times
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " prepTime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " rspec-cook-time ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " totaltime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     // Ingredients
     $recipe->resetIngredients();
     $ing_nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredients ")]/*');
     foreach ($ing_nodes as $ing_node) {
         if ($ing_node->getAttribute('class') == "ingr-divider") {
             $line = RecipeParser_Text::formatSectionName($ing_node->nodeValue);
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients from inside of <ul class="ingredientsList">
         // Child nodes should all be <li>
         if ($ing_node->nodeName == 'ul') {
             foreach ($ing_node->childNodes as $node) {
                 $line = trim($node->nodeValue);
                 $recipe->appendIngredient($line);
             }
             continue;
         }
     }
     return $recipe;
 }
Example #4
0
 public static function parse($html, $url)
 {
     // Get all of the standard hrecipe stuff we can find.
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//*[@name="resizeTo"]');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->getAttribute("value")) . " servings";
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredient ")]');
     foreach ($nodes as $node) {
         $parts = array();
         foreach ($node->childNodes as $n) {
             $parts[] = $n->nodeValue;
         }
         $line = implode(' ', $parts);
         $line = str_replace(" ; ", "; ", $line);
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//div[@class="display-field"]/p');
     foreach ($nodes as $node) {
         $line = trim($node->nodeValue);
         if ($line == strtoupper($line)) {
             $line = RecipeParser_Text::formatSectionName($line);
             $recipe->addInstructionsSection($line);
         } else {
             $recipe->appendInstruction($line);
         }
     }
     return $recipe;
 }
Example #5
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // --- Items not properly definied in Recipe.com's microformat markup.
     // Title -- Fallback if "fn" is not defined.
     if (!$recipe->title) {
         $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " heading1 ")]');
         if ($nodes->length) {
             $recipe->title = trim($nodes->item(0)->nodeValue);
         }
     }
     // Photo -- Fallback if "photo" is not defined.
     if (!$recipe->photo_url) {
         $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " PB10 ")]/img');
         if ($nodes->length) {
             $url = $nodes->item(0)->getAttribute('src');
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($url, $this->url);
         }
     }
     // Yield
     $nodes = $xpath->query('//*[@class="servingsize"]');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Credits
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " partnerName ")]');
     if ($nodes->length) {
         $line = RecipeParser_Text::FormatAsOneLine($nodes->item(0)->nodeValue);
         $line = preg_replace('/\\s*Recipe from\\s+(.*)$/', "\$1", $line);
         $recipe->credits = trim($line);
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Description
     $description = "";
     $nodes = $xpath->query('//div[@id="recipe"]/p/i');
     foreach ($nodes as $node) {
         $line = trim($node->nodeValue);
         if (strpos($line, "Adapted from") === false) {
             $description .= $line . "\n\n";
         }
     }
     $description = trim($description);
     $recipe->description = $description;
     // Ingredients
     $recipe->resetIngredients();
     $lines = array();
     // Add ingredients to blob
     $nodes = $xpath->query('//div[@id="recipe"]/blockquote/p');
     foreach ($nodes as $node) {
         foreach ($node->childNodes as $child) {
             $line = trim($child->nodeValue);
             switch ($child->nodeName) {
                 case "strong":
                 case "b":
                     if (strpos($line, ":") === false) {
                         $line .= ":";
                     }
                     $lines[] = $line;
                     break;
                 case "#text":
                 case "div":
                 case "p":
                     $lines[] = $line;
                     break;
             }
         }
     }
     foreach ($lines as $line) {
         if (RecipeParser_Text::matchSectionName($line)) {
             $recipe->addIngredientsSection(RecipeParser_Text::formatSectionName($line));
         } else {
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendIngredient($line);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $lines = array();
     $nodes = $xpath->query('//div[@id="recipe"]/*');
     $passed_ingredients = false;
     foreach ($nodes as $node) {
         if ($node->nodeName == "blockquote") {
             $passed_ingredients = true;
             continue;
         }
         if ($node->nodeName == "p") {
             if ($passed_ingredients) {
                 $line = trim($node->nodeValue);
                 // Finished with ingredients once we hit "Adapted" notes or any <p>
                 // with a class attribute.
                 if (stripos($line, "Adapted from") !== false) {
                     break;
                 } else {
                     if ($node->getAttribute("class")) {
                         break;
                     }
                 }
                 // Servings?
                 if (stripos($line, "Serves ") === 0) {
                     $recipe->yield = RecipeParser_Text::formatYield($line);
                     continue;
                 }
                 $recipe->appendInstruction(RecipeParser_Text::formatAsOneLine($node->nodeValue));
             }
         }
     }
     return $recipe;
 }
Example #7
0
 public static function parse($html, $url)
 {
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // OVERRIDES FOR ABOUT.COM
     // Cook times
     $node_list = $xpath->query('//div[@id = "articlebody"]/h3');
     foreach ($node_list as $node) {
         $line = $node->nodeValue;
         $line = preg_replace('/[\\s\\"]+/', ' ', $line);
         $line = trim($line);
         if (preg_match("/prep time\\:(.+)/i", $line, $m)) {
             $recipe->time['prep'] = RecipeParser_Times::toMinutes($m[1]);
         } else {
             if (preg_match("/cook time\\:(.+)/i", $line, $m)) {
                 $recipe->time['cook'] = RecipeParser_Times::toMinutes($m[1]);
             }
         }
         // Total time is provided as part of microformat markup for About.com
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//div[@class = "instructions"]');
     foreach ($nodes as $node) {
         $text = trim($node->nodeValue);
         $lines = preg_split("/[\n\r]+/", $text);
         for ($i = count($lines) - 1; $i >= 0; $i--) {
             $lines[$i] = trim($lines[$i]);
             // Remove ends of lines that have the word "recipes" squashed up against
             // another word, which seems to happen with long lists of related
             // recipe links.
             // Remove lines that have the phrase "Xxxxx Recipes and More".
             // Remove lines that have the phrase "Xxxxx Recipes | Xxxxx".
             // Remove mentions of newsletters.
             $lines[$i] = preg_replace("/(.*)recipes\\w/i", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)More .* Recipes.*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipes and More.*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipes \\| .*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipe Newsletter.*/", "\$1", $lines[$i]);
             // Look for a line in the instructions that looks like a yield.
             if (strpos($lines[$i], "Makes ") === 0) {
                 $recipe->yield = substr($lines[$i], 6);
                 $lines[$i] = '';
                 continue;
             }
         }
         foreach ($lines as $line) {
             $line = trim($line);
             if (empty($line)) {
                 continue;
             }
             // Match section names that read something like "---For the cake: Raise the oven temperature..."
             if (preg_match("/^(?:-{2,})?For the (.+)\\: (.*)\$/i", $line, $m)) {
                 $section = $m[1];
                 $section = RecipeParser_Text::formatSectionName($section);
                 $recipe->addInstructionsSection($section);
                 // Reset the value of $line, without the section name.
                 $line = ucfirst($m[2]);
             }
             $recipe->appendInstruction($line);
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     if (!$recipe->title) {
         $nodes = $xpath->query('//div[@itemprop="name"]');
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $line = RecipeParser_Text::formatTitle($line);
             $recipe->title = $line;
         }
     }
     if (!$recipe->yield) {
         $nodes = $xpath->query('//div[@class="box"]/div');
         foreach ($nodes as $node) {
             $line = trim($node->nodeValue);
             if (stripos($line, "makes") === 0) {
                 $line = RecipeParser_Text::formatYield($line);
                 $recipe->yield = $line;
                 break;
             }
         }
     }
     if (!count($recipe->ingredients[0]["list"])) {
         $nodes = $xpath->query('//ul[@class="ingredients"]');
         if ($nodes->length) {
             $nodes = $nodes->item(0)->childNodes;
             $str = "";
             foreach ($nodes as $node) {
                 if (in_array($node->nodeName, array("li"))) {
                     $line = $node->nodeValue;
                     $str .= $line . "<br>";
                 }
             }
             $lines = explode("<br>", $str);
             foreach ($lines as $line) {
                 $line = RecipeParser_Text::formatAsOneLine($line);
                 $recipe->appendIngredient($line);
             }
         }
     }
     if (!count($recipe->instructions[0]["list"])) {
         $nodes = $xpath->query('//div[@class="instructions"]/ol/li');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendInstruction($line);
         }
     }
     if (!$recipe->photo_url) {
         $nodes = $xpath->query('//meta[@property="og:image"]');
         foreach ($nodes as $node) {
             $line = $node->getAttribute("content");
             if (strpos($line, "wp-content") !== false) {
                 $recipe->photo_url = $line;
                 break;
             }
         }
     }
     return $recipe;
 }
Example #9
0
 public static function parse($html, $url)
 {
     // Get all of the standard hrecipe stuff we can find.
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $recipe->resetIngredients();
     $recipeName = $xpath->query('.//*[@itemprop="name"]');
     $value = trim($recipeName[0]->nodeValue);
     $recipe->title = $value;
     $nodes = $xpath->query('//li[@itemprop="recipeInstructions"]/*');
     if ($nodes->length) {
         foreach ($nodes as $sub) {
             $line = trim($sub->nodeValue);
             $line = RecipeParser_Text::stripLeadingNumbers($line);
             $recipe->appendInstruction($line);
         }
     }
     $image = $xpath->query('.//*[@itemprop="image"]');
     $photo_url = $image[0]->getAttribute('src');
     $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
     // Meta data
     $nodes = $xpath->query('//div[@class="recipe-metadata-wrap"]/*');
     if ($nodes->length) {
         $prepTime = $xpath->query('.//*[@itemprop="prepTime"]');
         foreach ($prepTime[0]->attributes as $sub) {
             if ($sub->nodeName == "content") {
                 $value = trim($sub->nodeValue);
                 $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($value);
             }
         }
         $prepTime = $xpath->query('.//*[@itemprop="cookTime"]');
         foreach ($prepTime[0]->attributes as $sub) {
             if ($sub->nodeName == "content") {
                 $value = trim($sub->nodeValue);
                 $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($value);
             }
         }
         $recipe->time['total'] = $recipe->time['cook'] + $recipe->time['prep'];
         $recipeYield = $xpath->query('.//*[@itemprop="recipeYield"]');
         $value = trim($recipeYield[0]->nodeValue);
         $recipe->yield = RecipeParser_Text::formatYield($value);
     }
     // Multi-stage ingredients
     $nodes = $xpath->query('//div[@class="recipe-ingredients-wrapper"]/*');
     if ($nodes->length) {
         foreach ($nodes as $node) {
             if ($node->nodeName == 'h3') {
                 $value = $node->nodeValue;
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 if ($node->nodeName == 'ul') {
                     $subs = $xpath->query('.//li[@itemprop="ingredients"]', $node);
                     foreach ($subs as $sub) {
                         $value = trim($sub->nodeValue);
                         $recipe->appendIngredient($value);
                     }
                 }
             }
         }
     }
     return $recipe;
 }