コード例 #1
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//*[@id="page-title"]');
     if ($nodes->length) {
         $line = RecipeParser_Text::formatTitle($nodes->item(0)->nodeValue);
         $recipe->title = $line;
     }
     // Times
     $nodes = $xpath->query('//*[@class="field-recipe-time"]');
     foreach ($nodes as $node) {
         $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         if (strpos($line, "Hands-On Time") !== false) {
             $line = str_replace("Hands-On Time ", "", $line);
             $recipe->time["prep"] = RecipeParser_Times::toMinutes($line);
         } else {
             if (strpos($line, "Total Time") !== false) {
                 $line = str_replace("Total Time ", "", $line);
                 $recipe->time["total"] = RecipeParser_Times::toMinutes($line);
             }
         }
     }
     // Yield
     $nodes = $xpath->query('//*[@class="field-yield"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatYield($line);
         $recipe->yield = $line;
     }
     // Ingredients
     $nodes = $xpath->query('//*[@class="field-ingredients"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//*[@class="field-instructions"]//li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendInstruction($line);
     }
     // Photo
     $nodes = $xpath->query('//*[@property="og:image"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute('content');
         $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
     }
     return $recipe;
 }
コード例 #2
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $hrecipe = $xpath->query('//section[@role="main"]');
     if ($hrecipe->length) {
         $hrecipe = $hrecipe->item(0);
         // Title is not marked up with class="fn"
         $nodes = $xpath->query('.//h1', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->title = RecipeParser_Text::formatTitle($value);
         }
         // Yield -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info yield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         $nodes = $xpath->query('.//span[@itemprop="recipeYield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Prep Times -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info preptime"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->time['prep'] = RecipeParser_Times::toMinutes($value);
         }
         // Total Time / Duration -- Class names are conflated
         $nodes = $xpath->query('.//*[@class="info duration"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $recipe->time['total'] = RecipeParser_Times::toMinutes($value);
         }
     }
     // Photo
     $nodes = $xpath->query('//section[@class="content-unit"]/img');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute('src');
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
     }
     // Remove recipe title intros -- e.g. "Sunday Dinner: Pork Ribs" changes to "Pork Ribs"
     if (strpos($recipe->title, ": ") !== false) {
         $recipe->title = preg_replace("/^[^:]+: (.+)/", "\$1", $recipe->title);
     }
     return $recipe;
 }
コード例 #3
0
ファイル: Bookmark.php プロジェクト: JoshRamynke/MealPlan-Web
 public static function getBookmarkAsRecipeStruct($html, $url)
 {
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // This recipe will be stored as a bookmark
     $recipe = new RecipeStruct();
     $recipe->url = $url;
     $recipe->status = "bookmark";
     // Find the page title
     $title = "";
     $title_tag = "";
     $title_og_meta = "";
     $nodes = $xpath->query('//title');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatTitle($line);
         if ($line) {
             $title_tag = $line;
         }
     }
     $nodes = $xpath->query('//meta[@property="og:title"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->getAttribute("content");
         $line = RecipeParser_Text::formatTitle($line);
         if ($line) {
             $title_og_meta = $line;
         }
     }
     // Which title string to use?
     if ($title_og_meta) {
         $title = $title_og_meta;
     } else {
         if ($title_tag) {
             $title = $title_tag;
         } else {
             $title = "Recipe from {$url}";
         }
     }
     $recipe->title = $title;
     // Get image from Open Graph tag
     $nodes = $xpath->query('//meta[@property="og:image"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute("content");
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
         }
     }
     return $recipe;
 }
コード例 #4
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataDataVocabulary::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title missing?
     if (!$recipe->title) {
         $nodes = $xpath->query('//meta[@property="og:title"]');
         if ($nodes->length) {
             $line = $nodes->item(0)->getAttribute("content");
             $line = RecipeParser_Text::formatTitle($line);
             $recipe->title = $line;
         }
     }
     // Photo URL, use larger version found on MyRecipes
     $recipe->photo_url = str_replace('-l.jpg', '-x.jpg', $recipe->photo_url);
     // Credits
     $nodes = $xpath->query('//*[@class="link-list"]/h4');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         if (strpos($line, "More from") === 0) {
             $line = str_replace("More from ", "", $line);
             $recipe->credits = $line;
         }
     }
     // Times
     $searches = array('prep' => 'prep: ', 'cook' => 'cook: ', 'total' => 'total: ');
     $nodes = $xpath->query('//*[@class="recipe-time-info"]');
     foreach ($nodes as $node) {
         $line = trim(strtolower($node->nodeValue));
         foreach ($searches as $key => $value) {
             if (strpos($line, $value) === 0) {
                 $line = str_replace($value, "", $line);
                 $recipe->time[$key] = RecipeParser_Times::toMinutes($line);
             }
         }
     }
     // Clean up each of the ingredients to remove "$Click to see savings"
     // These don't come through in the curl'ed test files
     for ($i = 0; $i < count($recipe->ingredients); $i++) {
         for ($j = 0; $j < count($recipe->ingredients[$i]['list']); $j++) {
             if (strpos($recipe->ingredients[$i]['list'][$j], "\$") > 0) {
                 $recipe->ingredients[$i]['list'][$j] = substr($recipe->ingredients[$i]['list'][$j], 0, strpos($recipe->ingredients[$i]['list'][$j], "\$"));
             }
         }
     }
     return $recipe;
 }
コード例 #5
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//h1[@itemprop="name"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatTitle($line);
         $recipe->title = $line;
     }
     // Description
     $nodes = $xpath->query('//*[@itemprop="description"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->description = $line;
     }
     // Author
     $nodes = $xpath->query('//span[@itemprop="author"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatCredits($line);
         $recipe->credits = $line;
     }
     // Prep Times
     $nodes = $xpath->query('//*[@itemprop="prepTime"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->getAttribute("content");
         $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($value);
     }
     // Total Time
     $nodes = $xpath->query('//*[@itemprop="totalTime"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->getAttribute("content");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
     }
     // Yield
     $nodes = $xpath->query('//*[@itemprop="recipeyield"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Ingredients
     $nodes = $xpath->query('//*[@itemprop="ingredients"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//*[@itemprop="recipeinstructions"]/li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendInstruction($line);
     }
     // Photo
     $nodes = $xpath->query('//meta[@property="og:image"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->getAttribute("content");
         $recipe->photo_url = $line;
     }
     return $recipe;
 }
コード例 #6
0
 public function test_title_ends_recipe()
 {
     $this->assertEquals("Bananas Foster", RecipeParser_Text::formatTitle(" Bananas Foster Recipe "));
 }
コード例 #7
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $microdata = null;
     $nodes = $xpath->query('//*[contains(@itemtype, "//schema.org/Recipe") or contains(@itemtype, "//schema.org/recipe")]');
     if ($nodes->length) {
         $microdata = $nodes->item(0);
     }
     // Parse elements
     if ($microdata) {
         // Title
         $nodes = $xpath->query('.//*[@itemprop="name"]', $microdata);
         if ($nodes->length) {
             $value = trim($nodes->item(0)->nodeValue);
             $recipe->title = RecipeParser_Text::formatTitle($value);
         }
         // Summary
         $nodes = $xpath->query('.//*[@itemprop="description"]', $microdata);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $value = RecipeParser_Text::formatAsParagraphs($value);
             $recipe->description = $value;
         }
         // Times
         $searches = array('prepTime' => 'prep', 'cookTime' => 'cook', 'totalTime' => 'total');
         foreach ($searches as $itemprop => $time_key) {
             $nodes = $xpath->query('.//*[@itemprop="' . $itemprop . '"]', $microdata);
             if ($nodes->length) {
                 if ($value = $nodes->item(0)->getAttribute('content')) {
                     $value = RecipeParser_Text::iso8601ToMinutes($value);
                 } else {
                     if ($value = $nodes->item(0)->getAttribute('datetime')) {
                         $value = RecipeParser_Text::iso8601ToMinutes($value);
                     } else {
                         $value = trim($nodes->item(0)->nodeValue);
                         $value = RecipeParser_Times::toMinutes($value);
                     }
                 }
                 if ($value) {
                     $recipe->time[$time_key] = $value;
                 }
             }
         }
         // Yield
         $nodes = $xpath->query('.//*[@itemprop="recipeYield"]', $microdata);
         if (!$nodes->length) {
             $nodes = $xpath->query('.//*[@itemprop="recipeyield"]', $microdata);
         }
         if ($nodes->length) {
             if ($nodes->item(0)->hasAttribute('content')) {
                 $line = $nodes->item(0)->getAttribute('content');
             } else {
                 $line = $nodes->item(0)->nodeValue;
             }
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Ingredients
         $nodes = $xpath->query('//*[@itemprop="ingredients"]');
         foreach ($nodes as $node) {
             $value = $node->nodeValue;
             $value = RecipeParser_Text::formatAsOneLine($value);
             if (empty($value)) {
                 continue;
             }
             if (strlen($value) > 150) {
                 // probably a mistake, like a run-on of existing ingredients?
                 continue;
             }
             if (RecipeParser_Text::matchSectionName($value)) {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 $recipe->appendIngredient($value);
             }
         }
         // Instructions
         $found = false;
         // Look for markup that uses <li> tags for each instruction.
         if (!$found) {
             $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]//li');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Look for instructions as direct descendents of "recipeInstructions".
         if (!$found) {
             $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]/*');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Some sites will use an "instruction" class for each line.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="recipeInstructions"]//*[contains(concat(" ", normalize-space(@class), " "), " instruction ")]');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Either multiple recipeInstructions nodes, or one node with a blob of text.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="recipeInstructions"]');
             if ($nodes->length > 1) {
                 // Multiple nodes
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             } else {
                 if ($nodes->length == 1) {
                     // Blob
                     $str = $nodes->item(0)->nodeValue;
                     RecipeParser_Text::parseInstructionsFromBlob($str, $recipe);
                     $found = true;
                 }
             }
         }
         // Photo
         $photo_url = "";
         if (!$photo_url) {
             // try to find open graph url
             $nodes = $xpath->query('//meta[@property="og:image"]');
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('content');
             }
         }
         if (!$photo_url) {
             $nodes = $xpath->query('.//*[@itemprop="image"]', $microdata);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if (!$photo_url) {
             // for <img> as sub-node of class="photo"
             $nodes = $xpath->query('.//*[@itemprop="image"]//img', $microdata);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
         }
         // Credits
         $line = "";
         $nodes = $xpath->query('.//*[@itemprop="author"]', $microdata);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
         }
         $nodes = $xpath->query('.//*[@itemprop="publisher"]', $microdata);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
         }
         $recipe->credits = RecipeParser_Text::formatCredits($line);
     }
     return $recipe;
 }
コード例 #8
0
 public static function parse($html, $url)
 {
     // Get all of the standard microdata stuff we can find.
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // --- Allrecipes allows for custom recipes that use a different
     // --- template than their standard content. This template is not currently
     // --- using schema.org/Recipe. So we'll look for fields that need to be
     // --- overridden.
     // Title
     if (!$recipe->title) {
         $node_list = $xpath->query('//h1[@itemprop="name"]');
         if ($node_list->length) {
             $value = RecipeParser_Text::formatTitle($node_list->item(0)->nodeValue);
             $recipe->title = $value;
         }
     }
     // Yield
     if (!$recipe->yield) {
         $node_list = $xpath->query('//div[@class = "servings-form"]//span[@class = "yield yieldform"]');
         if ($node_list->length) {
             $value = $node_list->item(0)->nodeValue;
             $recipe->yield = $value;
         }
     }
     // Times
     $searches = array('liPrep' => 'prep', 'liCook' => 'cook', 'liTotal' => 'total');
     foreach ($searches as $id_name => $time_key) {
         $nodes = $xpath->query('.//*[@id="' . $id_name . '"]');
         if ($nodes->length) {
             $value = RecipeParser_Text::formatAsOneLine($nodes->item(0)->nodeValue);
             $value = trim(preg_replace("/(COOK|PREP|READY IN)/", "", $value));
             $value = RecipeParser_Times::toMinutes($value);
             if ($value) {
                 $recipe->time[$time_key] = $value;
             }
         }
     }
     // Ingredients
     if (!count($recipe->ingredients[0]["list"])) {
         $node_list = $xpath->query('//li[contains(concat(" ", normalize-space(@class), " "), " ingredient ")]');
         foreach ($node_list as $node) {
             $line = trim(strip_tags($node->nodeValue));
             if (preg_match("/^(.+):\$/", $line, $m)) {
                 $recipe->addIngredientsSection(ucfirst(strtolower($m[1])));
             } else {
                 if ($line) {
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     // Instructions
     if (!count($recipe->instructions[0]["list"])) {
         $nodes = $xpath->query('//div[@class="directions"]//ol/li');
         foreach ($nodes as $node) {
             $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
             if (preg_match("/^(.+):\$/", $line, $m)) {
                 $recipe->addInstructionsSection(ucfirst(strtolower($m[1])));
             } else {
                 if ($line) {
                     $recipe->appendInstruction($line);
                 }
             }
         }
     }
     // Look for useless line at end of instructions
     $i = count($recipe->instructions) - 1;
     $j = count($recipe->instructions[$i]['list']) - 1;
     if ($j >= 0 && strpos($recipe->instructions[$i]['list'][$j], "All done!") === 0) {
         unset($recipe->instructions[$i]['list'][$j]);
     }
     // Photo URL
     // Get larger images
     if ($recipe->photo_url) {
         $recipe->photo_url = str_replace('/userphoto/small/', '/userphoto/big/', $recipe->photo_url);
         $recipe->photo_url = str_replace('/userphotos/140x140/', '/userphotos/250x250/', $recipe->photo_url);
     }
     return $recipe;
 }
コード例 #9
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Find the top-level node for Recipe microdata
     $microdata = null;
     $nodes = $xpath->query('//*[@itemtype="http://data-vocabulary.org/Recipe"]');
     if ($nodes->length) {
         $microdata = $nodes->item(0);
     }
     // Parse elements
     if ($microdata) {
         // Title
         $nodes = $xpath->query('.//*[@itemprop="name"]', $microdata);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $value = RecipeParser_Text::formatTitle($value);
             $recipe->title = $value;
         }
         // Summary
         $nodes = $xpath->query('.//*[@itemprop="summary"]', $microdata);
         if ($nodes->length) {
             $value = trim($nodes->item(0)->nodeValue);
             $recipe->description = $value;
         }
         // Times
         $searches = array('prepTime' => 'prep', 'cookTime' => 'cook', 'totalTime' => 'total');
         foreach ($searches as $itemprop => $time_key) {
             $nodes = $xpath->query('.//*[@itemprop="' . $itemprop . '"]', $microdata);
             if ($nodes->length) {
                 if ($value = $nodes->item(0)->getAttribute('datetime')) {
                     $value = RecipeParser_Text::iso8601ToMinutes($value);
                 } else {
                     if ($value = $nodes->item(0)->getAttribute('content')) {
                         $value = RecipeParser_Text::iso8601ToMinutes($value);
                     } else {
                         $value = trim($nodes->item(0)->nodeValue);
                         $value = RecipeParser_Times::toMinutes($value);
                     }
                 }
                 if ($value) {
                     $recipe->time[$time_key] = $value;
                 }
             }
         }
         // Yield
         $line = "";
         $nodes = $xpath->query('.//*[@itemprop="yield"]', $microdata);
         if ($nodes->length) {
             $line = trim($nodes->item(0)->nodeValue);
         } else {
             $nodes = $xpath->query('.//*[@itemprop="servingSize"]', $microdata);
             if ($nodes->length) {
                 $line = trim($nodes->item(0)->nodeValue);
             }
         }
         if ($line) {
             $line = preg_replace('/\\s+/', ' ', $line);
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Ingredients
         $nodes = null;
         // (data-vocabulary)
         if (!$nodes || !$nodes->length) {
             $nodes = $xpath->query('.//*[@itemprop="ingredient"]', $microdata);
         }
         if (!$nodes || !$nodes->length) {
             // non-standard
             $nodes = $xpath->query('.//*[@id="ingredients"]//li', $microdata);
         }
         if (!$nodes || !$nodes->length) {
             // non-standard
             $nodes = $xpath->query('.//*[@class="ingredients"]//li', $microdata);
         }
         foreach ($nodes as $node) {
             $value = $node->nodeValue;
             $value = RecipeParser_Text::formatAsOneLine($value);
             if (empty($value)) {
                 continue;
             }
             if (RecipeParser_Text::matchSectionName($value)) {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 $recipe->appendIngredient($value);
             }
         }
         // Instructions
         $found = false;
         // Look for markup that uses <li> tags for each instruction.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="instructions"]//li', $microdata);
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Some sites will use an "instruction" class for each line.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="instruction"]//*[contains(concat(" ", normalize-space(@class), " "), " instruction ")]', $microdata);
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Either multiple instrutions nodes, or one node with a blob of text.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="instructions"]', $microdata);
             if ($nodes->length > 1) {
                 // Multiple nodes
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             } else {
                 if ($nodes->length == 1) {
                     // Blob
                     $str = $nodes->item(0)->nodeValue;
                     RecipeParser_Text::parseInstructionsFromBlob($str, $recipe);
                     $found = true;
                 }
             }
         }
         // Photo
         $photo_url = "";
         if (!$photo_url) {
             // try to find open graph url
             $nodes = $xpath->query('//meta[@property="og:image"]');
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('content');
             }
         }
         if (!$photo_url) {
             $nodes = $xpath->query('.//*[@itemprop="photo"]', $microdata);
             if ($nodes->length) {
                 if ($nodes->item(0)->hasAttribute('src')) {
                     $photo_url = $nodes->item(0)->getAttribute('src');
                 } else {
                     if ($nodes->item(0)->hasAttribute('content')) {
                         $photo_url = $nodes->item(0)->getAttribute('content');
                     }
                 }
             }
         }
         if (!$photo_url) {
             // for <img> as sub-node of class="photo"
             $nodes = $xpath->query('.//*[@itemprop="photo"]//img', $microdata);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
         // Credits
         $nodes = $xpath->query('.//*[@itemprop="author"]', $microdata);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->credits = RecipeParser_Text::formatCredits($line);
         }
     }
     return $recipe;
 }
コード例 #10
0
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     if (!$recipe->title) {
         $nodes = $xpath->query('//div[@itemprop="name"]');
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $line = RecipeParser_Text::formatTitle($line);
             $recipe->title = $line;
         }
     }
     if (!$recipe->yield) {
         $nodes = $xpath->query('//div[@class="box"]/div');
         foreach ($nodes as $node) {
             $line = trim($node->nodeValue);
             if (stripos($line, "makes") === 0) {
                 $line = RecipeParser_Text::formatYield($line);
                 $recipe->yield = $line;
                 break;
             }
         }
     }
     if (!count($recipe->ingredients[0]["list"])) {
         $nodes = $xpath->query('//ul[@class="ingredients"]');
         if ($nodes->length) {
             $nodes = $nodes->item(0)->childNodes;
             $str = "";
             foreach ($nodes as $node) {
                 if (in_array($node->nodeName, array("li"))) {
                     $line = $node->nodeValue;
                     $str .= $line . "<br>";
                 }
             }
             $lines = explode("<br>", $str);
             foreach ($lines as $line) {
                 $line = RecipeParser_Text::formatAsOneLine($line);
                 $recipe->appendIngredient($line);
             }
         }
     }
     if (!count($recipe->instructions[0]["list"])) {
         $nodes = $xpath->query('//div[@class="instructions"]/ol/li');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendInstruction($line);
         }
     }
     if (!$recipe->photo_url) {
         $nodes = $xpath->query('//meta[@property="og:image"]');
         foreach ($nodes as $node) {
             $line = $node->getAttribute("content");
             if (strpos($line, "wp-content") !== false) {
                 $recipe->photo_url = $line;
                 break;
             }
         }
     }
     return $recipe;
 }
コード例 #11
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//*[@class="rTitle fn"]');
     if ($nodes->length) {
         $line = RecipeParser_Text::formatTitle($nodes->item(0)->nodeValue);
         $recipe->title = $line;
     }
     // Yield
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " yield ")]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Times
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " prepTime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " rspec-cook-time ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " totaltime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     // Ingredients
     $nodes = $xpath->query('//*[@class="ingredient"]');
     foreach ($nodes as $node) {
         $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//*[@class="instructions"]');
     if ($nodes->length) {
         $blob = "";
         foreach ($nodes->item(0)->childNodes as $node) {
             $blob .= RecipeParser_Text::formatAsOneLine($node->nodeValue) . " ";
             if ($node->nodeName == "p") {
                 $blob .= "\n\n";
             }
         }
         // Minor cleanup
         $blob = str_replace(" , ", ", ", $blob);
         $blob = str_replace(" . ", ". ", $blob);
         $blob = str_replace("  ", " ", $blob);
         foreach (explode("\n\n", $blob) as $line) {
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendInstruction($line);
         }
     }
     // Photo
     $nodes = $xpath->query('//a[@class="img-enlarge"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute("href");
         $photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         $recipe->photo_url = $photo_url;
     }
     return $recipe;
 }
コード例 #12
0
ファイル: Aboutcom.php プロジェクト: johndunne/RecipeParser
 public static function parse($html, $url)
 {
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // OVERRIDES FOR ABOUT.COM
     // Title
     $nodes = $xpath->query('//*[@itemprop="headline name"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->title = RecipeParser_Text::formatTitle($value);
     }
     // Credits
     $nodes = $xpath->query('//*[@itemprop="author"]//*[@itemprop="name"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->credits = RecipeParser_Text::formatCredits($line . ", About.com");
     }
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//*[@itemprop="ingredients"]');
     foreach ($nodes as $node) {
         $value = $node->nodeValue;
         $value = RecipeParser_Text::formatAsOneLine($value);
         if (RecipeParser_Text::matchSectionName($value) || $node->childNodes->item(0)->nodeName == "strong" || $node->childNodes->item(0)->nodeName == "b") {
             $value = RecipeParser_Text::formatSectionName($value);
             $recipe->addIngredientsSection($value);
         } else {
             $recipe->appendIngredient($value);
         }
     }
     // Instructions
     $recipe->resetInstructions();
     $nodes = $xpath->query('//div[@itemprop="recipeInstructions"]');
     foreach ($nodes as $node) {
         $text = trim($node->nodeValue);
         $lines = preg_split("/[\n\r]+/", $text);
         for ($i = count($lines) - 1; $i >= 0; $i--) {
             $lines[$i] = trim($lines[$i]);
             // Remove ends of lines that have the word "recipes" squashed up against
             // another word, which seems to happen with long lists of related
             // recipe links.
             // Remove lines that have the phrase "Xxxxx Recipes and More".
             // Remove lines that have the phrase "Xxxxx Recipes | Xxxxx".
             // Remove mentions of newsletters.
             $lines[$i] = preg_replace("/(.*)recipes\\w/i", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)More .* Recipes.*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipes and More.*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipes \\| .*/", "\$1", $lines[$i]);
             $lines[$i] = preg_replace("/(.*)Recipe Newsletter.*/", "\$1", $lines[$i]);
             // Look for a line in the instructions that looks like a yield.
             if (strpos($lines[$i], "Makes ") === 0) {
                 $recipe->yield = substr($lines[$i], 6);
                 $lines[$i] = '';
                 continue;
             }
         }
         foreach ($lines as $line) {
             $line = trim($line);
             if (empty($line)) {
                 continue;
             }
             if (strtolower($line) == "preparation") {
                 continue;
             }
             // Match section names that read something like "---For the cake: Raise the oven temperature..."
             if (preg_match("/^(?:-{2,})?For the (.+)\\: (.*)\$/i", $line, $m)) {
                 $section = $m[1];
                 $section = RecipeParser_Text::formatSectionName($section);
                 $recipe->addInstructionsSection($section);
                 // Reset the value of $line, without the section name.
                 $line = ucfirst($m[2]);
             }
             $recipe->appendInstruction($line);
         }
     }
     return $recipe;
 }
コード例 #13
0
 public static function parse($html, $url)
 {
     if (strpos($url, "www.nytimes.com/recipes/") !== false) {
         //
         // "RECIPES" SECTION
         //
         $recipe = new RecipeParser_Recipe();
         libxml_use_internal_errors(true);
         $doc = new DOMDocument();
         $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
         $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
         $xpath = new DOMXPath($doc);
         // Title
         $nodes = $xpath->query('//h1[@class="recipe-title recipeName"]');
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $value = RecipeParser_Text::formatTitle($value);
             $recipe->title = $value;
         }
         // Yield
         $nodes = $xpath->query('//*[@itemprop="recipeYield"]');
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $value = RecipeParser_Text::formatYield($value);
             $recipe->yield = $value;
         }
         // Ingredients
         $nodes = $xpath->query('//div[@class="ingredientsGroup"]/*');
         foreach ($nodes as $node) {
             if ($node->nodeName == "h3") {
                 $value = trim($node->nodeValue);
                 if (!preg_match('/^Ingredients:?$/i', $value)) {
                     $value = RecipeParser_Text::formatSectionName($value);
                     $recipe->addIngredientsSection($value);
                 }
             } else {
                 foreach ($node->childNodes as $child) {
                     $value = trim($child->nodeValue);
                     $recipe->appendIngredient($value);
                 }
             }
         }
         // Instructions
         $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]/dd');
         foreach ($nodes as $node) {
             $value = $node->nodeValue;
             $value = RecipeParser_Text::formatAsOneLine($value);
             $recipe->appendInstruction($value);
         }
         // Notes
         if (!$recipe->notes) {
             $nodes = $xpath->query('//div[@class="yieldNotesGroup"]//*[@class="note"]');
             if ($nodes->length) {
                 $value = trim($nodes->item(0)->nodeValue);
                 $value = preg_replace("/^Notes?:?\\s*/i", '', $value);
                 $recipe->notes = trim($value);
             }
         }
     } else {
         //
         // DINING SECTION RECIPES
         //
         $recipe = new RecipeParser_Recipe();
         libxml_use_internal_errors(true);
         $doc = new DOMDocument();
         $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
         $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
         $xpath = new DOMXPath($doc);
         // Title
         $nodes = $xpath->query('//div[@id = "article"]//h1');
         if ($nodes->length) {
             $value = trim($nodes->item(0)->nodeValue);
             $recipe->title = $value;
         }
         // Time and Yield
         $nodes = $xpath->query('//div[@id = "article"]//p');
         foreach ($nodes as $node) {
             $text = trim($node->nodeValue);
             if (preg_match('/^Yield:? (.+)/', $text, $m)) {
                 $recipe->yield = RecipeParser_Text::formatYield($m[1]);
             } else {
                 if (preg_match('/^Time:? (.+)/', $text, $m)) {
                     $str = trim($m[1]);
                     $str = preg_replace('/About (.+)/', '$1', $str);
                     $str = preg_replace('/(.+) plus.*/', '$1', $str);
                     $recipe->time['total'] = RecipeParser_Times::toMinutes($str);
                 }
             }
         }
         // Ingredients
         $nodes = $xpath->query('//div[@class="recipeIngredientsList"]/p');
         foreach ($nodes as $node) {
             $line = trim($node->nodeValue);
             // Section names
             if ($line && $line == strtoupper($line)) {
                 $line = RecipeParser_Text::formatSectionName($line);
                 $recipe->addIngredientsSection($line);
                 continue;
             }
             $recipe->appendIngredient($line);
         }
         // Instructions and notes
         $nodes = $xpath->query('//div[@class="articleBody"]//p');
         if (!$nodes->length) {
             $nodes = $xpath->query('//div[@id="articleBody"]//p');
         }
         $notes = '';
         $in_notes_section = false;
         foreach ($nodes as $node) {
             $line = trim($node->nodeValue);
             // Skip some of the useless lines
             if (preg_match('/^(Adapted from|Time|Yield)/i', $line)) {
                 continue;
             }
             // Instructions start with line numbers
             if (!$in_notes_section && preg_match('/^\\d+\\./', $line)) {
                 $line = RecipeParser_Text::stripLeadingNumbers($line);
                 $recipe->appendInstruction($line);
                 continue;
             }
             // Look for lines that start the notes section.
             $note = '';
             if (preg_match('/^Notes?:?(.*)/i', $line, $m)) {
                 $in_notes_section = true;
                 $note = trim($m[1]);
             } else {
                 if ($in_notes_section) {
                     $note = $line;
                 }
             }
             if ($note) {
                 $notes .= $note . "\n\n";
             }
         }
         if ($notes) {
             $notes = str_replace("  ", " ", $notes);
             // Some unnecessary spaces
             $notes = trim($notes);
             $recipe->notes = $notes;
         }
         // Photo
         $nodes = $xpath->query('//div[@class="image"]//img');
         if ($nodes->length) {
             $photo_url = $nodes->item(0)->getAttribute('src');
             $photo_url = str_replace('-articleInline.jpg', '-popup.jpg', $photo_url);
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
         }
     }
     return $recipe;
 }
コード例 #14
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $hrecipe = null;
     if (!$hrecipe) {
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " hrecipe ")]');
         if ($nodes->length) {
             $hrecipe = $nodes->item(0);
         }
     }
     if (!$hrecipe) {
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " hRecipe ")]');
         if ($nodes->length) {
             $hrecipe = $nodes->item(0);
         }
     }
     if ($hrecipe) {
         // Title
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " fn ")]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->title = RecipeParser_Text::formatTitle($line);
         }
         // Summary
         $nodes = $xpath->query('.//*[@class="summary"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->description = RecipeParser_Text::formatAsParagraphs($line);
         }
         // Credits
         $nodes = $xpath->query('.//*[@class="author"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->credits = RecipeParser_Text::formatCredits($line);
         }
         // Photo
         $photo_url = "";
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " photo ")]', $hrecipe);
         if ($nodes->length) {
             $photo_url = $nodes->item(0)->getAttribute('src');
         }
         if (!$photo_url) {
             // for <img> as sub-node of class="photo"
             $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " photo ")]//img', $hrecipe);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
         // Yield
         $nodes = $xpath->query('.//*[@class="yield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Prep Times
         $nodes = $xpath->query('.//*[@class="prepTime"]//*[@class="value-title"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->getAttribute('title');
             $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($value);
         } else {
             $nodes = $xpath->query('.//*[@class="preptime"]', $hrecipe);
             if ($nodes->length) {
                 $value = $nodes->item(0)->nodeValue;
                 $recipe->time['prep'] = RecipeParser_Times::toMinutes($value);
             }
         }
         // Cook Times
         $nodes = $xpath->query('.//*[@class="cookTime"]//*[@class="value-title"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->getAttribute('title');
             $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($value);
         } else {
             $nodes = $xpath->query('.//*[@class="cooktime"]', $hrecipe);
             if ($nodes->length) {
                 $value = $nodes->item(0)->nodeValue;
                 $recipe->time['cook'] = RecipeParser_Times::toMinutes($value);
             }
         }
         // Total Time / Duration
         $nodes = $xpath->query('.//*[@class="totalTime"]//*[@class="value-title"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->getAttribute('title');
             $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
         } else {
             $nodes = $xpath->query('.//*[@class="duration"]//*[@class="value-title"]', $hrecipe);
             if ($nodes->length) {
                 $value = $nodes->item(0)->getAttribute('title');
                 $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
             } else {
                 $nodes = $xpath->query('.//*[@class="duration"]', $hrecipe);
                 if ($nodes->length) {
                     $value = $nodes->item(0)->nodeValue;
                     $recipe->time['total'] = RecipeParser_Times::toMinutes($value);
                 }
             }
         }
         // Ingredients
         $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredient ")]');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = trim($line);
             $line = RecipeParser_Text::formatAsOneLine($line);
             // Skip lines that contain no word-like characters (sometimes used as section dividers).
             if (!preg_match("/\\w/", $line)) {
                 continue;
             }
             // Section name delineated with dashes. E.g. "---Cake---"
             if (preg_match('/^\\-+([^\\-]{1}.*[^\\-]{1})\\-+$/', $line, $m)) {
                 $line = RecipeParser_Text::formatSectionName($m[1]);
                 $recipe->addIngredientsSection($line);
                 continue;
             }
             // Section name with colon.
             if (preg_match('/^(.+)\\:$/', $line, $m)) {
                 $line = RecipeParser_Text::formatSectionName($m[1]);
                 $recipe->addIngredientsSection($line);
                 continue;
             }
             $recipe->appendIngredient($line);
         }
         // Instructions
         $found = false;
         // Look for usage of <li> to denote each step of the instructions.
         if (!$found) {
             $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " instructions ")]//li');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Look for "instruction class for each step of the instructions.
         if (!$found) {
             $query = '//*[contains(concat(" ", normalize-space(@class), " "), " instructions ")]' . '//*[contains(concat(" ", normalize-space(@class), " "), " instruction ")]';
             $nodes = $xpath->query($query);
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Default. Multiple instructions nodes, or one with a blob of text.
         if (!$found) {
             $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " instructions ")]');
             if ($nodes->length > 1) {
                 // Multiple nodes
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             } else {
                 if ($nodes->length == 1) {
                     // Blob
                     $str = $nodes->item(0)->nodeValue;
                     RecipeParser_Text::parseInstructionsFromBlob($str, $recipe);
                     $found = true;
                 }
             }
         }
     }
     return $recipe;
 }
コード例 #15
0
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//*[@id="recipe"]/h3');
     if ($nodes->length) {
         $line = RecipeParser_Text::formatTitle($nodes->item(0)->nodeValue);
         $recipe->title = $line;
     }
     // Instructions and Ingredients
     $nodes = $xpath->query('//*[@id="recipe"]/*');
     $blob = "";
     $found_servings = false;
     foreach ($nodes as $node) {
         // Skip title
         if ($node->nodeName == "h3") {
             continue;
         }
         // Get servings
         $line = $node->nodeValue;
         if (strpos($line, "Serves")) {
             if (preg_match("/.*(Serves.+)\$/m", $line, $m)) {
                 $line = $m[1];
                 $recipe->yield = RecipeParser_Text::formatYield($line);
                 continue;
             }
         }
         // Add child nodes to blob
         foreach ($node->childNodes as $child) {
             $line = trim($child->nodeValue);
             switch ($child->nodeName) {
                 case "strong":
                     $blob .= $line . " ";
                     break;
                 case "em":
                     if (strpos($line, ":") === false) {
                         $line .= ":";
                     }
                     $blob .= $line . "\n\n";
                     break;
                 case "#text":
                 case "div":
                 case "span":
                 case "p":
                     if ($line == "•") {
                         continue;
                     }
                     $blob .= $line . "\n\n";
                     break;
             }
         }
     }
     RecipeParser_Text::parseIngredientsAndInstructionsFromBlob($blob, $recipe);
     // Photo
     $nodes = $xpath->query('//meta[@property="og:image"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->getAttribute("content");
         $recipe->photo_url = $line;
     }
     return $recipe;
 }