コード例 #1
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = null;
     if (!$nodes || !$nodes->length) {
         $nodes = $xpath->query('//*[@id="recipe-ingredients"]//div[@class="view-content"]/*');
     }
     if (!$nodes || !$nodes->length) {
         $nodes = $xpath->query('//*[@id="recipe-ingredients"]//div[@class="ingredient-lists separator-serated tab-content"]/*');
     }
     foreach ($nodes as $node) {
         if ($node->nodeName == 'h3') {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatSectionName($line);
             $recipe->addIngredientsSection($line);
         } else {
             if ($node->nodeName == 'ul') {
                 foreach ($node->childNodes as $subnode) {
                     $line = $subnode->nodeValue;
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
コード例 #2
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id = "recipe-ingredients"]/*');
     foreach ($nodes as $node) {
         if ($node->nodeName == 'p') {
             $value = trim($node->nodeValue);
             // Older recipes will have ingredients jumbled into a single <p>
             // rather than using 'ingredients' classes. If the node value looks
             // like multiple lines, treat it like a section header followed by
             // section ingredients.
             $lines = explode("\n", $value);
             if (count($lines) > 1) {
                 for ($i = 0; $i < count($lines); $i++) {
                     $line = trim($lines[$i]);
                     if ($i == 0) {
                         $line = RecipeParser_Text::formatSectionName($line);
                         $recipe->addIngredientsSection($line);
                     } else {
                         $line = trim($line);
                         $recipe->appendIngredient($line);
                     }
                 }
                 // Otherwise, we're dealing with a normal section for hrecipe, and
                 // ingredients for the section will follow as <ul> elements.
             } else {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             }
         } else {
             if ($node->nodeName == 'ul') {
                 $subnodes = $xpath->query('./li[@class = "ingredient"]', $node);
                 foreach ($subnodes as $subnode) {
                     $value = trim($subnode->nodeValue);
                     $recipe->appendIngredient($value);
                 }
             }
         }
     }
     // Notes
     $nodes = $xpath->query('//*[@id="recipe-intronote"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         $recipe->notes = RecipeParser_Text::formatAsParagraphs($value);
     }
     // Photo URL to replace og:image
     $nodes = $xpath->query('//img[@itemprop="image"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute("src");
         $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
     }
     return $recipe;
 }
コード例 #3
0
ファイル: Bhgcom.php プロジェクト: JoshRamynke/MealPlan-Web
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Notes -- Collect the non-standard cook times and baking temps,
     // and also any tips/notes that appear at the end of the recipe instructions.
     $notes = array();
     $nodes = $xpath->query('//*[@class="recipeTips"]//li');
     foreach ($nodes as $node) {
         $value = RecipeParser_Text::FormatAsOneLine($node->nodeValue);
         $value = preg_replace("/^(Tip|Note)\\s*(.*)\$/", "\$2", $value);
         $notes[] = $value;
     }
     $nodes = $xpath->query('//*[@class="recipeInfo"]//*[@class="type"]');
     foreach ($nodes as $node) {
         $value = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         if (strpos($value, "Makes:") !== false) {
             continue;
         }
         $notes[] = $value;
     }
     $recipe->notes = implode("\n\n", $notes);
     // Adjust Photo URL for larger dimensions
     $recipe->photo_url = preg_replace("/\\/l_([^\\/]+)/", "/550_\$1", $recipe->photo_url);
     return $recipe;
 }
コード例 #4
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // OVERRIDES for epicurious
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id = "ingredients"]/*');
     foreach ($nodes as $node) {
         // <strong> contains ingredient section names
         if ($node->nodeName == 'strong') {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients from inside of <ul class="ingredientsList">
         if ($node->nodeName == 'ul') {
             // Child nodes should all be <li>
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 if ($ing_node->nodeName == 'li') {
                     $line = trim($ing_node->nodeValue);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
コード例 #5
0
ファイル: Foodcom.php プロジェクト: johndunne/RecipeParser
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Photo -- skip logo if it was used in place of photo
     if (strpos($recipe->photo_url, "FDC_Logo_vertical.png") !== false || strpos($recipe->photo_url, "FDC_share-logo.png") !== false) {
         $recipe->photo_url = '';
     }
     if ($recipe->photo_url) {
         $recipe->photo_url = str_replace("/thumbs/", "/large/", $recipe->photo_url);
     }
     // Yield
     $yield = '';
     $nodes = $xpath->query('//*[@class="yield"]');
     // Find as 'yield'
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatYield($line);
         $recipe->yield = $line;
         // Or as number of 'servings'
     } else {
         $nodes = $xpath->query('//*[@class="servings"]//*[@class="value"]');
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $line = RecipeParser_Text::formatYield($line);
             $recipe->yield = $line;
         }
     }
     return $recipe;
 }
コード例 #6
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Photo
     $recipe->photo_url = str_replace("_75.jpg", "_300.jpg", $recipe->photo_url);
     return $recipe;
 }
コード例 #7
0
ファイル: Cookingcom.php プロジェクト: johndunne/RecipeParser
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     return $recipe;
 }
コード例 #8
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // --- OVERRIDES
     return $recipe;
 }
コード例 #9
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // ---- OVERRIDES
     // Title
     $nodes = $xpath->query('//h3//strong');
     if ($nodes->length) {
         $line = RecipeParser_Text::formatAsOneLine($nodes->item(0)->nodeValue);
         $recipe->title = $line;
     }
     // Yield
     $nodes = $xpath->query('//span[@itemprop="articleBody"]//p');
     foreach ($nodes as $node) {
         $line = trim($node->nodeValue);
         if (strpos($line, "Yield") === 0 || strpos($line, "Serve") === 0) {
             $line = RecipeParser_Text::formatYield($line);
             $recipe->yield = $line;
             break;
         }
     }
     // Ingredients
     $nodes = $xpath->query('//span[@itemprop="articleBody"]//ul/li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//span[@itemprop="articleBody"]//ol/li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendInstruction($line);
     }
     // Image
     $nodes = $xpath->query('//meta[@property="og:image"]');
     foreach ($nodes as $node) {
         $line = $node->getAttribute("content");
         $recipe->photo_url = $line;
         break;
     }
     return $recipe;
 }
コード例 #10
0
ファイル: Foodcom.php プロジェクト: JoshRamynke/MealPlan-Web
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Photo -- skip logo if it was used in place of photo
     if (strpos($recipe->photo_url, "FDC_Logo_vertical.png") !== false) {
         $recipe->photo_url = '';
     }
     if ($recipe->photo_url) {
         $recipe->photo_url = str_replace("/thumbs/", "/large/", $recipe->photo_url);
     }
     // Yield
     $yield = '';
     $nodes = $xpath->query('//option[@class="select-title"]');
     if ($nodes->length) {
         $yield .= trim($nodes->item(0)->nodeValue);
     }
     $nodes = $xpath->query('//p[@class="yieldUnits-txt"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $yield .= ' ' . ($value ? $value : 'servings');
     }
     $recipe->yield = trim($yield);
     // Ingredients (custom because of duplicate class attributes for "ingredients")
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@class = "pod ingredients"]/*');
     foreach ($nodes as $node) {
         # <h3> contains ingredient section names
         if ($node->nodeName == 'h3') {
             $recipe->addIngredientsSection(ucfirst(trim(strtolower($node->nodeValue))));
         }
         # Extract ingredients from <ul> <li>.
         if ($node->nodeName == 'ul') {
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 // Find <li> with class="ingredient" for each ingredient.
                 if ($ing_node->nodeName == 'li') {
                     $line = RecipeParser_Text::FormatAsOneLine($ing_node->nodeValue);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
コード例 #11
0
ファイル: Chowcom.php プロジェクト: johndunne/RecipeParser
 public static function parse($html, $url)
 {
     // Get all of the standard bits we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Titles include "recipe"
     if (preg_match("/ Recipe( - CHOW.com)?\$/", $recipe->title)) {
         $recipe->title = trim(preg_replace("/(.*) Recipe( - CHOW.com)?\$/", "\$1", $recipe->title));
     }
     // Strip leading numbers from instructions
     for ($i = 0; $i < count($recipe->instructions); $i++) {
         for ($j = 0; $j < count($recipe->instructions[$i]['list']); $j++) {
             $recipe->instructions[$i]['list'][$j] = preg_replace("/^\\d+(\\w.*)\$/", "\$1", $recipe->instructions[$i]['list'][$j]);
         }
     }
     // Ingredients (If none parsed)
     if (!count($recipe->ingredients[0]['list'])) {
         $nodes = $xpath->query('//*[@id="ingredients_list"]//li');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendIngredient($line);
         }
     }
     // Instructions (If none parsed)
     if (!count($recipe->instructions[0]['list'])) {
         $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendInstruction($line);
         }
     }
     // Cleanup description
     if ($recipe->description) {
         $recipe->description = preg_replace("/^(Read our review of|This (dish|recipe) was featured as part|See more recipes) .*\$/m", "", $recipe->description);
         $recipe->description = preg_replace("/[\r\n]{3,}/", "\n\n", $recipe->description);
         $recipe->description = trim($recipe->description);
     }
     return $recipe;
 }
コード例 #12
0
ファイル: Recipecom.php プロジェクト: johndunne/RecipeParser
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Overrides for data that isn't captured by their implementation of Schema.org.
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]');
     foreach ($nodes as $node) {
         $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         $recipe->appendInstruction($line);
     }
     return $recipe;
 }
コード例 #13
0
ファイル: Food52com.php プロジェクト: johndunne/RecipeParser
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // ---- OVERRIDES
     // Credits
     if ($recipe->credits) {
         $recipe->credits = "Food52 (" . $recipe->credits . ")";
     } else {
         $recipe->credits = "Food52";
     }
     // Notes
     $line = "";
     $nodes = $xpath->query('.//span[@class="recipe-note"]');
     if ($nodes->length) {
         $nodes = $nodes->item(0)->childNodes;
         // go through 'childNodes' to get #text nodes
         foreach ($nodes as $node) {
             switch ($node->nodeName) {
                 case "br":
                     $line .= "\n";
                     break;
                 case "#text":
                 case "span":
                 case "strong":
                 case "b":
                 case "em":
                 case "i":
                 case "a":
                     $line .= $node->nodeValue . " ";
                     break;
             }
         }
     }
     $line = preg_replace("/^Author Notes:\\s*/", "", $line);
     $recipe->notes = RecipeParser_Text::formatAsParagraphs($line);
     return $recipe;
 }
コード例 #14
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $sections = $xpath->query('//*[@id="ingredients"]//*[@class="group"]');
     if ($sections->length) {
         // Sections
         foreach ($sections as $section_node) {
             $section_nodes = $xpath->query('.//h3', $section_node);
             if ($section_nodes->length) {
                 $line = $section_nodes->item(0)->nodeValue;
                 $line = RecipeParser_Text::formatSectionName($line);
                 if (!empty($line)) {
                     $recipe->addIngredientsSection($line);
                 }
             }
             $ing_nodes = $xpath->query('.//li', $section_node);
             if ($ing_nodes->length) {
                 foreach ($ing_nodes as $node) {
                     $line = $node->nodeValue;
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     // Notes
     $nodes = $xpath->query('.//*[@class = "body-c note-text"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         $value = trim(str_replace("Cook's Note", '', $value));
         $recipe->notes = $value;
     }
     return $recipe;
 }
コード例 #15
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Notes
     $nodes = $xpath->query('//div[@class="rd_editornote margin_bottom"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $line = preg_replace("/Editor's Note:\\s+/", "", $line);
         $recipe->notes = $line;
     }
     // Override image
     $nodes = $xpath->query('//meta[@itemprop="image"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->getAttribute("content");
         $recipe->photo_url = $line;
     }
     return $recipe;
 }
コード例 #16
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@class="col6 ingredients"]/*');
     foreach ($nodes as $node) {
         // Extract ingredients from <ul> <li>.
         if ($node->nodeName == 'ul') {
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 // Find <li> with itemprop="ingredients" for each ingredient.
                 if ($ing_node->nodeName == 'li' && $ing_node->getAttribute("itemprop") == "ingredients") {
                     $line = trim($ing_node->nodeValue);
                     // Section titles might be all uppercase ingredients
                     if ($line == strtoupper($line)) {
                         $line = RecipeParser_Text::formatSectionName($line);
                         $recipe->addIngredientsSection($line);
                         continue;
                     }
                     // Ingredient lines
                     if (stripos($line, "copyright") !== false) {
                         continue;
                     } else {
                         if (stripos($line, "recipe follows") !== false) {
                             continue;
                         } else {
                             $line = RecipeParser_Text::formatAsOneLine($line);
                             $recipe->appendIngredient($line);
                         }
                     }
                     // Section titles
                 } else {
                     if ($ing_node->nodeName == 'li' && $ing_node->getAttribute("class") == "subtitle") {
                         $line = trim($ing_node->nodeValue);
                         $line = RecipeParser_Text::formatSectionName($line);
                         $recipe->addIngredientsSection($line);
                     }
                 }
             }
             continue;
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]/*');
     foreach ($nodes as $node) {
         if ($node->nodeName == "span") {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             $recipe->addInstructionsSection($line);
         } else {
             if ($node->nodeName == "p") {
                 $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
                 if (!preg_match("/^Photograph/i", $line)) {
                     $recipe->appendInstruction($line);
                 }
             }
         }
     }
     // See if we've captured a chef's photo, and delete it (if so).
     if ($recipe->photo_url) {
         $nodes = $xpath->query('//a[@itemprop="url"]/img[@itemprop="image"]');
         if ($nodes->length > 0) {
             $url = $nodes->item(0)->getAttribute("src");
             if ($recipe->photo_url == $url) {
                 $recipe->photo_url = "";
             }
         }
     }
     return $recipe;
 }
コード例 #17
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//*[@class="prep_box"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         if (preg_match("/Number of Servings: (\\d+)/", $line, $m)) {
             $recipe->yield = RecipeParser_Text::formatYield($m[1]);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $str = "";
     $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]');
     if ($nodes->length) {
         $children = $nodes->item(0)->childNodes;
         // This is a piece of HTML that has <br> tags for breaks in each instruction.
         // Rather than just getting nodeValue, I want to preserve the <br> tags. So I'm
         // looking for them as nodes and appending them to the string. Any other nodes
         // (either #text or other, e.g. <a href="">) get passed along into the string as
         // nodeValue.
         foreach ($children as $child) {
             if ($child->nodeName == "br") {
                 $str .= "<br>";
             } else {
                 $line = trim($child->nodeValue);
                 if (!empty($line)) {
                     $str .= $line;
                 }
             }
         }
         $lines = explode("<br>", $str);
         foreach ($lines as $line) {
             if (empty($line)) {
                 continue;
             } else {
                 if (RecipeParser_Text::matchSectionName($line)) {
                     $line = RecipeParser_Text::formatSectionName($line);
                     $recipe->addInstructionsSection($line);
                 } else {
                     if (!empty($line)) {
                         $line = RecipeParser_Text::formatAsOneLine($line);
                         $line = RecipeParser_Text::stripLeadingNumbers($line);
                         if (stripos($line, "Recipe submitted by SparkPeople") === 0) {
                             continue;
                         }
                         if (stripos($line, "Number of Servings:") === 0) {
                             continue;
                         }
                         $recipe->appendInstruction($line);
                     }
                 }
             }
         }
     }
     return $recipe;
 }
コード例 #18
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[contains(concat(" ", normalize-space(@class), " "), " ingredient-set ")]/*');
     foreach ($nodes as $node) {
         // <h3> contains section name.
         if ($node->nodeName == 'h3') {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             if ($line == "Ingredients") {
                 continue;
             }
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients as the node value of each <ul> -> <li> elements.
         if ($node->nodeName == 'ul') {
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 if ($ing_node->nodeName == 'li') {
                     $line = trim($ing_node->nodeValue);
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     if ($line) {
                         $recipe->appendIngredient($line);
                     }
                 }
             }
             continue;
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//div[@class="prep-steps"]/*');
     foreach ($nodes as $node) {
         // <h3> contains section name.
         if ($node->nodeName == 'h3') {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             if ($line == "Preparation") {
                 continue;
             }
             if (!empty($line)) {
                 $recipe->addInstructionsSection($line);
             }
             continue;
         }
         // Extract each step as the node value of <ul> -> <li> elements.
         if ($node->nodeName == 'ul') {
             $inst_nodes = $node->childNodes;
             foreach ($inst_nodes as $inst_node) {
                 if ($inst_node->nodeName == 'li') {
                     $line = trim($inst_node->nodeValue);
                     if (preg_match("/(Hungry|Thirsty) for more\\?/i", $line)) {
                         continue;
                     } else {
                         if (!empty($line)) {
                             $recipe->appendInstruction($line);
                         }
                     }
                 }
             }
             continue;
         }
     }
     return $recipe;
 }
コード例 #19
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Times
     $nodes = $xpath->query('//*[@class="recipePartAttributes recipePartPrimaryAttributes"]//li');
     if ($nodes->length) {
         foreach ($nodes as $node) {
             if (trim($node->childNodes->item(1)->nodeValue) == "Prep Time") {
                 $line = trim($node->childNodes->item(3)->nodeValue);
                 $recipe->time['prep'] = RecipeParser_Times::toMinutes($line);
                 continue;
             }
             if (trim($node->childNodes->item(1)->nodeValue) == "Total Time") {
                 $line = trim($node->childNodes->item(3)->nodeValue);
                 $recipe->time['total'] = RecipeParser_Times::toMinutes($line);
                 continue;
             }
         }
     }
     // Yield
     $nodes = $xpath->query('//*[@class="recipePartAttributes recipePartSecondaryAttributes"]//li');
     if ($nodes->length) {
         foreach ($nodes as $node) {
             if (trim($node->childNodes->item(1)->nodeValue) == "Servings") {
                 $line = trim($node->childNodes->item(3)->nodeValue);
                 $recipe->yield = RecipeParser_Text::formatYield($line);
             }
         }
     }
     // Ingredients
     $recipe->resetIngredients();
     $groups = $xpath->query('//*[@class="recipePartIngredientGroup"]');
     foreach ($groups as $group) {
         $nodes = $xpath->query('.//h2', $group);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $line = RecipeParser_Text::formatSectionName($line);
             $recipe->addIngredientsSection($line);
         }
         $nodes = $xpath->query('.//*[@itemprop="ingredients"]', $group);
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendIngredient($line);
         }
     }
     // Notes / footnotes
     $notes = array();
     $nodes = $xpath->query('//div[@class="recipePartTipsInfo"]');
     foreach ($nodes as $node) {
         $line = trim($node->nodeValue);
         $notes[] = $line;
     }
     $recipe->notes = implode("\n\n", $notes);
     $recipe->notes = RecipeParser_Text::formatAsParagraphs($recipe->notes);
     // Fix description
     $recipe->description = trim(preg_replace("/Servings \\# \\d+/", "", $recipe->description));
     return $recipe;
 }
コード例 #20
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // OVERRIDES for epicurious
     // Prep Times
     $nodes = $xpath->query('//*[@class="summary_data"]');
     if ($nodes->length) {
         foreach ($nodes as $node) {
             if (preg_match('/ACTIVE/', $node->nodeValue)) {
                 $ing_nodes = $node->childNodes;
                 foreach ($ing_nodes as $ing_node) {
                     if ($ing_node->nodeName == "span") {
                         $recipe->prep_time = RecipeParser_Text::formatAsOneLine($ing_node->nodeValue);
                     }
                 }
             } else {
                 if (preg_match('/TOTAL/', $node->nodeValue)) {
                     $ing_nodes = $node->childNodes;
                     foreach ($ing_nodes as $ing_node) {
                         if ($ing_node->nodeName == "span") {
                             $recipe->total_time = RecipeParser_Text::formatAsOneLine($ing_node->nodeValue);
                         }
                     }
                 }
             }
         }
     }
     // Total Time
     $nodes = $xpath->query('//*[@itemprop="totalTime"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->getAttribute("content");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
     }
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id = "ingredients"]/*');
     foreach ($nodes as $node) {
         // <strong> contains ingredient section names
         if ($node->nodeName == 'strong') {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients from inside of <ul class="ingredientsList">
         if ($node->nodeName == 'ul') {
             // Child nodes should all be <li>
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 if ($ing_node->nodeName == 'li') {
                     $line = trim($ing_node->nodeValue);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
コード例 #21
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // --- Allrecipes allows for custom recipes that use a different
     // --- template than their standard content. This template is not currently
     // --- using schema.org/Recipe. So we'll look for fields that need to be
     // --- overridden.
     // Title
     if (!$recipe->title) {
         $node_list = $xpath->query('//h1[@id = "itemTitle"]');
         if ($node_list->length) {
             $value = $node_list->item(0)->nodeValue;
             $value = trim($value);
             $recipe->title = $value;
         }
     }
     // Yield
     if (!$recipe->yield) {
         $node_list = $xpath->query('//div[@class = "servings-form"]//span[@class = "yield yieldform"]');
         if ($node_list->length) {
             $value = $node_list->item(0)->nodeValue;
             $recipe->yield = $value;
         }
     }
     // Times
     $searches = array('liPrep' => 'prep', 'liCook' => 'cook', 'liTotal' => 'total');
     foreach ($searches as $id_name => $time_key) {
         $nodes = $xpath->query('.//*[@id="' . $id_name . '"]');
         if ($nodes->length) {
             $value = RecipeParser_Text::formatAsOneLine($nodes->item(0)->nodeValue);
             $value = trim(preg_replace("/(COOK|PREP|READY IN)/", "", $value));
             $value = RecipeParser_Times::toMinutes($value);
             if ($value) {
                 $recipe->time[$time_key] = $value;
             }
         }
     }
     // Ingredients
     if (!count($recipe->ingredients[0]["list"])) {
         $node_list = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " ingredient ")]');
         foreach ($node_list as $node) {
             $line = trim(strip_tags($node->nodeValue));
             if (preg_match("/^(.+):\$/", $line, $m)) {
                 $recipe->addIngredientsSection(ucfirst(strtolower($m[1])));
             } else {
                 if ($line) {
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     // Instructions
     if (!count($recipe->instructions[0]["list"])) {
         $nodes = $xpath->query('//div[@class="directions"]//ol/li');
         foreach ($nodes as $node) {
             $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
             if (preg_match("/^(.+):\$/", $line, $m)) {
                 $recipe->addInstructionsSection(ucfirst(strtolower($m[1])));
             } else {
                 if ($line) {
                     $recipe->appendInstruction($line);
                 }
             }
         }
     }
     // Photo URL
     // Get larger images
     if ($recipe->photo_url) {
         $recipe->photo_url = str_replace('/userphoto/small/', '/userphoto/big/', $recipe->photo_url);
         $recipe->photo_url = str_replace('/userphotos/140x140/', '/userphotos/250x250/', $recipe->photo_url);
     }
     return $recipe;
 }
コード例 #22
0
ファイル: Aboutcom.php プロジェクト: johndunne/RecipeParser
 public static function parse($html, $url)
 {
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // OVERRIDES FOR ABOUT.COM
     // Title
     $nodes = $xpath->query('//*[@itemprop="headline name"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->title = RecipeParser_Text::formatTitle($value);
     }
     // Credits
     $nodes = $xpath->query('//*[@itemprop="author"]//*[@itemprop="name"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->credits = RecipeParser_Text::formatCredits($line . ", About.com");
     }
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//*[@itemprop="ingredients"]');
     foreach ($nodes as $node) {
         $value = $node->nodeValue;
         $value = RecipeParser_Text::formatAsOneLine($value);
         if (RecipeParser_Text::matchSectionName($value) || $node->childNodes->item(0)->nodeName == "strong" || $node->childNodes->item(0)->nodeName == "b") {
             $value = RecipeParser_Text::formatSectionName($value);
             $recipe->addIngredientsSection($value);
         } else {
             $recipe->appendIngredient($value);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//div[@itemprop="recipeInstructions"]');
     foreach ($nodes as $node) {
         $text = trim($node->nodeValue);
         $lines = preg_split("/[\n\r]+/", $text);
         for ($i = count($lines) - 1; $i >= 0; $i--) {
             $lines[$i] = trim($lines[$i]);
             // Remove ends of lines that have the word "recipes" squashed up against
             // another word, which seems to happen with long lists of related
             // recipe links.
             // Remove lines that have the phrase "Xxxxx Recipes and More".
             // Remove lines that have the phrase "Xxxxx Recipes | Xxxxx".
             // Remove mentions of newsletters.
             $lines[$i] = preg_replace("/(.*)recipes\\w/i", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)More .* Recipes.*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipes and More.*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipes \\| .*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipe Newsletter.*/", "\$1", $lines[$i]);
             // Look for a line in the instructions that looks like a yield.
             if (strpos($lines[$i], "Makes ") === 0) {
                 $recipe->yield = substr($lines[$i], 6);
                 $lines[$i] = '';
                 continue;
             }
         }
         foreach ($lines as $line) {
             $line = trim($line);
             if (empty($line)) {
                 continue;
             }
             if (strtolower($line) == "preparation") {
                 continue;
             }
             // Match section names that read something like "---For the cake: Raise the oven temperature..."
             if (preg_match("/^(?:-{2,})?For the (.+)\\: (.*)\$/i", $line, $m)) {
                 $section = $m[1];
                 $section = RecipeParser_Text::formatSectionName($section);
                 $recipe->addInstructionsSection($section);
                 // Reset the value of $line, without the section name.
                 $line = ucfirst($m[2]);
             }
             $recipe->appendInstruction($line);
         }
     }
     return $recipe;
 }
コード例 #23
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//li[@class="credit"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         if (stripos($line, "servings") !== false) {
             $line = preg_replace("/servings\\:?.*(\\d+)/i", "\$1", $line);
             $line = RecipeParser_Text::formatYield($line);
             $recipe->yield = $line;
         }
     }
     // Description
     $nodes = $xpath->query('//*[@itemprop="page-dek"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->description = $line;
     }
     // Notes
     $line = "";
     $nodes = $xpath->query('//*[@class="note-text"]');
     foreach ($nodes as $node) {
         $line .= trim($node->nodeValue) . "\n\n";
     }
     $line = rtrim($line);
     $recipe->notes = $line;
     // Ingredients
     $recipe->resetIngredients();
     $sections = $xpath->query('//*[@class="components-group"]');
     if ($sections->length) {
         // Sections
         foreach ($sections as $section_node) {
             $section_nodes = $xpath->query('.//*[@class="components-group-header"]', $section_node);
             if ($section_nodes->length) {
                 $line = $section_nodes->item(0)->nodeValue;
                 $line = RecipeParser_Text::formatSectionName($line);
                 if (!empty($line)) {
                     $recipe->addIngredientsSection($line);
                 }
             }
             $ing_nodes = $xpath->query('.//*[@class="components-item"]', $section_node);
             if ($ing_nodes->length) {
                 foreach ($ing_nodes as $node) {
                     $line = $node->nodeValue;
                     $line = RecipeParser_Text::formatAsOneLine($line);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//*[@class="directions-item"]');
     foreach ($nodes as $node) {
         $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         $recipe->appendInstruction($line);
     }
     // Photo URL
     $nodes = $xpath->query('//img[@itemprop="image"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute("data-original");
         $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
     }
     return $recipe;
 }