public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Yield
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " yield ")]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Times
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " prepTime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " rspec-cook-time ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " totaltime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     // Ingredients
     $recipe->resetIngredients();
     $ing_nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredients ")]/*');
     foreach ($ing_nodes as $ing_node) {
         if ($ing_node->getAttribute('class') == "ingr-divider") {
             $line = RecipeParser_Text::formatSectionName($ing_node->nodeValue);
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients from inside of <ul class="ingredientsList">
         // Child nodes should all be <li>
         if ($ing_node->nodeName == 'ul') {
             foreach ($ing_node->childNodes as $node) {
                 $line = trim($node->nodeValue);
                 $recipe->appendIngredient($line);
             }
             continue;
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = RecipeParser_Parser_MicrodataSchema::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // OVERRIDES for epicurious
     // Prep Times
     $nodes = $xpath->query('//*[@class="summary_data"]');
     if ($nodes->length) {
         foreach ($nodes as $node) {
             if (preg_match('/ACTIVE/', $node->nodeValue)) {
                 $ing_nodes = $node->childNodes;
                 foreach ($ing_nodes as $ing_node) {
                     if ($ing_node->nodeName == "span") {
                         $recipe->prep_time = RecipeParser_Text::formatAsOneLine($ing_node->nodeValue);
                     }
                 }
             } else {
                 if (preg_match('/TOTAL/', $node->nodeValue)) {
                     $ing_nodes = $node->childNodes;
                     foreach ($ing_nodes as $ing_node) {
                         if ($ing_node->nodeName == "span") {
                             $recipe->total_time = RecipeParser_Text::formatAsOneLine($ing_node->nodeValue);
                         }
                     }
                 }
             }
         }
     }
     // Total Time
     $nodes = $xpath->query('//*[@itemprop="totalTime"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->getAttribute("content");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
     }
     // Ingredients
     $recipe->resetIngredients();
     $nodes = $xpath->query('//div[@id = "ingredients"]/*');
     foreach ($nodes as $node) {
         // <strong> contains ingredient section names
         if ($node->nodeName == 'strong') {
             $line = RecipeParser_Text::formatSectionName($node->nodeValue);
             $recipe->addIngredientsSection($line);
             continue;
         }
         // Extract ingredients from inside of <ul class="ingredientsList">
         if ($node->nodeName == 'ul') {
             // Child nodes should all be <li>
             $ing_nodes = $node->childNodes;
             foreach ($ing_nodes as $ing_node) {
                 if ($ing_node->nodeName == 'li') {
                     $line = trim($ing_node->nodeValue);
                     $recipe->appendIngredient($line);
                 }
             }
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//*[@property="v:name"]');
     if ($nodes->length) {
         $recipe->title = trim($nodes->item(0)->nodeValue);
     }
     // Summary
     $nodes = $xpath->query('//*[@property="v:summary"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->description = $value;
     }
     // Times
     $searches = array('v:prepTime' => 'prep', 'v:cookTime' => 'cook', 'v:totalTime' => 'total');
     foreach ($searches as $itemprop => $time_key) {
         $nodes = $xpath->query('//*[@property="' . $itemprop . '"]');
         if ($nodes->length) {
             if ($value = $nodes->item(0)->getAttribute('content')) {
                 $value = RecipeParser_Text::iso8601ToMinutes($value);
             } else {
                 $value = trim($nodes->item(0)->nodeValue);
                 $value = RecipeParser_Times::toMinutes($value);
             }
             if ($value) {
                 $recipe->time[$time_key] = $value;
             }
         }
     }
     // Yield
     $nodes = $xpath->query('//*[@property="v:yield"]');
     if ($nodes->length) {
         $line = trim($nodes->item(0)->nodeValue);
         $line = preg_replace('/\\s+/', ' ', $line);
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Ingredients
     $nodes = null;
     // (data-vocabulary)
     $nodes = $xpath->query('//*[@rel="v:ingredient"]');
     foreach ($nodes as $node) {
         $value = $node->nodeValue;
         $value = RecipeParser_Text::formatAsOneLine($value);
         if (empty($value)) {
             continue;
         }
         if (RecipeParser_Text::matchSectionName($value)) {
             $value = RecipeParser_Text::formatSectionName($value);
             $recipe->addIngredientsSection($value);
         } else {
             $recipe->appendIngredient($value);
         }
     }
     // Instructions
     $found = false;
     // Some sites will use an "instruction" class for each line.
     if (!$found) {
         $nodes = $xpath->query('//*[@property="v:instructions"]//*[@property="v:instruction"]');
         if ($nodes->length) {
             RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
             $found = true;
         }
     }
     // Look for markup that uses <li>, <p> or other tags for each instruction.
     $search_sub_nodes = array("p", "li");
     while (!$found && ($tag = array_pop($search_sub_nodes))) {
         $nodes = $xpath->query('//*[@property="v:instructions"]//' . $tag);
         if ($nodes->length) {
             RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
             $found = true;
         }
     }
     // Either multiple instrutions nodes, or one node with a blob of text.
     if (!$found) {
         $nodes = $xpath->query('//*[@property="v:instructions"]');
         if ($nodes->length > 1) {
             // Multiple nodes
             RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
             $found = true;
         } else {
             if ($nodes->length == 1) {
                 // Blob
                 $str = $nodes->item(0)->nodeValue;
                 RecipeParser_Text::parseInstructionsFromBlob($str, $recipe);
                 $found = true;
             }
         }
     }
     // Photo
     $photo_url = "";
     $nodes = $xpath->query('//*[@rel="v:photo"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute('src');
     }
     if (!$photo_url) {
         // for <img> as sub-node of rel="v:photo"
         $nodes = $xpath->query('//*[@rel="v:photo"]//img');
         if ($nodes->length) {
             $photo_url = $nodes->item(0)->getAttribute('src');
         }
     }
     if ($photo_url) {
         $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
     }
     // Credits
     $nodes = $xpath->query('//*[@property="v:author"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->credits = RecipeParser_Text::formatCredits($line);
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//h1[@itemprop="name"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatTitle($line);
         $recipe->title = $line;
     }
     // Description
     $nodes = $xpath->query('//*[@itemprop="description"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->description = $line;
     }
     // Author
     $nodes = $xpath->query('//span[@itemprop="author"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $line = RecipeParser_Text::formatCredits($line);
         $recipe->credits = $line;
     }
     // Prep Times
     $nodes = $xpath->query('//*[@itemprop="prepTime"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->getAttribute("content");
         $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($value);
     }
     // Total Time
     $nodes = $xpath->query('//*[@itemprop="totalTime"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->getAttribute("content");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
     }
     // Yield
     $nodes = $xpath->query('//*[@itemprop="recipeyield"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Ingredients
     $nodes = $xpath->query('//*[@itemprop="ingredients"]');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//*[@itemprop="recipeinstructions"]/li');
     foreach ($nodes as $node) {
         $line = $node->nodeValue;
         $line = RecipeParser_Text::formatAsOneLine($line);
         $recipe->appendInstruction($line);
     }
     // Photo
     $nodes = $xpath->query('//meta[@property="og:image"]');
     if ($nodes->length) {
         $line = $nodes->item(0)->getAttribute("content");
         $recipe->photo_url = $line;
     }
     return $recipe;
 }
 public function test_iso8601_minutes()
 {
     $this->assertEquals(120, RecipeParser_Text::iso8601ToMinutes('PT2H0M'));
     $this->assertEquals(30, RecipeParser_Text::iso8601ToMinutes('PT0,5H'));
     $this->assertEquals(34500, RecipeParser_Text::iso8601ToMinutes('P23DT23H'));
     $this->assertEquals(262974, RecipeParser_Text::iso8601ToMinutes('P0.5Y'));
     $this->assertEquals(751, RecipeParser_Text::iso8601ToMinutes('PT12H30M44S'));
     $this->assertEquals(30, RecipeParser_Text::iso8601ToMinutes('T30M'));
 }
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $microdata = null;
     $nodes = $xpath->query('//*[contains(@itemtype, "//schema.org/Recipe") or contains(@itemtype, "//schema.org/recipe")]');
     if ($nodes->length) {
         $microdata = $nodes->item(0);
     }
     // Parse elements
     if ($microdata) {
         // Title
         $nodes = $xpath->query('.//*[@itemprop="name"]', $microdata);
         if ($nodes->length) {
             $value = trim($nodes->item(0)->nodeValue);
             $recipe->title = RecipeParser_Text::formatTitle($value);
         }
         // Summary
         $nodes = $xpath->query('.//*[@itemprop="description"]', $microdata);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $value = RecipeParser_Text::formatAsParagraphs($value);
             $recipe->description = $value;
         }
         // Times
         $searches = array('prepTime' => 'prep', 'cookTime' => 'cook', 'totalTime' => 'total');
         foreach ($searches as $itemprop => $time_key) {
             $nodes = $xpath->query('.//*[@itemprop="' . $itemprop . '"]', $microdata);
             if ($nodes->length) {
                 if ($value = $nodes->item(0)->getAttribute('content')) {
                     $value = RecipeParser_Text::iso8601ToMinutes($value);
                 } else {
                     if ($value = $nodes->item(0)->getAttribute('datetime')) {
                         $value = RecipeParser_Text::iso8601ToMinutes($value);
                     } else {
                         $value = trim($nodes->item(0)->nodeValue);
                         $value = RecipeParser_Times::toMinutes($value);
                     }
                 }
                 if ($value) {
                     $recipe->time[$time_key] = $value;
                 }
             }
         }
         // Yield
         $nodes = $xpath->query('.//*[@itemprop="recipeYield"]', $microdata);
         if (!$nodes->length) {
             $nodes = $xpath->query('.//*[@itemprop="recipeyield"]', $microdata);
         }
         if ($nodes->length) {
             if ($nodes->item(0)->hasAttribute('content')) {
                 $line = $nodes->item(0)->getAttribute('content');
             } else {
                 $line = $nodes->item(0)->nodeValue;
             }
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Ingredients
         $nodes = $xpath->query('//*[@itemprop="ingredients"]');
         foreach ($nodes as $node) {
             $value = $node->nodeValue;
             $value = RecipeParser_Text::formatAsOneLine($value);
             if (empty($value)) {
                 continue;
             }
             if (strlen($value) > 150) {
                 // probably a mistake, like a run-on of existing ingredients?
                 continue;
             }
             if (RecipeParser_Text::matchSectionName($value)) {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 $recipe->appendIngredient($value);
             }
         }
         // Instructions
         $found = false;
         // Look for markup that uses <li> tags for each instruction.
         if (!$found) {
             $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]//li');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Look for instructions as direct descendents of "recipeInstructions".
         if (!$found) {
             $nodes = $xpath->query('//*[@itemprop="recipeInstructions"]/*');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Some sites will use an "instruction" class for each line.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="recipeInstructions"]//*[contains(concat(" ", normalize-space(@class), " "), " instruction ")]');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Either multiple recipeInstructions nodes, or one node with a blob of text.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="recipeInstructions"]');
             if ($nodes->length > 1) {
                 // Multiple nodes
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             } else {
                 if ($nodes->length == 1) {
                     // Blob
                     $str = $nodes->item(0)->nodeValue;
                     RecipeParser_Text::parseInstructionsFromBlob($str, $recipe);
                     $found = true;
                 }
             }
         }
         // Photo
         $photo_url = "";
         if (!$photo_url) {
             // try to find open graph url
             $nodes = $xpath->query('//meta[@property="og:image"]');
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('content');
             }
         }
         if (!$photo_url) {
             $nodes = $xpath->query('.//*[@itemprop="image"]', $microdata);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if (!$photo_url) {
             // for <img> as sub-node of class="photo"
             $nodes = $xpath->query('.//*[@itemprop="image"]//img', $microdata);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
         }
         // Credits
         $line = "";
         $nodes = $xpath->query('.//*[@itemprop="author"]', $microdata);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
         }
         $nodes = $xpath->query('.//*[@itemprop="publisher"]', $microdata);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
         }
         $recipe->credits = RecipeParser_Text::formatCredits($line);
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $doc = new DOMDocument();
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Find the top-level node for Recipe microdata
     $microdata = null;
     $nodes = $xpath->query('//*[@itemtype="http://data-vocabulary.org/Recipe"]');
     if ($nodes->length) {
         $microdata = $nodes->item(0);
     }
     // Parse elements
     if ($microdata) {
         // Title
         $nodes = $xpath->query('.//*[@itemprop="name"]', $microdata);
         if ($nodes->length) {
             $value = $nodes->item(0)->nodeValue;
             $value = RecipeParser_Text::formatTitle($value);
             $recipe->title = $value;
         }
         // Summary
         $nodes = $xpath->query('.//*[@itemprop="summary"]', $microdata);
         if ($nodes->length) {
             $value = trim($nodes->item(0)->nodeValue);
             $recipe->description = $value;
         }
         // Times
         $searches = array('prepTime' => 'prep', 'cookTime' => 'cook', 'totalTime' => 'total');
         foreach ($searches as $itemprop => $time_key) {
             $nodes = $xpath->query('.//*[@itemprop="' . $itemprop . '"]', $microdata);
             if ($nodes->length) {
                 if ($value = $nodes->item(0)->getAttribute('datetime')) {
                     $value = RecipeParser_Text::iso8601ToMinutes($value);
                 } else {
                     if ($value = $nodes->item(0)->getAttribute('content')) {
                         $value = RecipeParser_Text::iso8601ToMinutes($value);
                     } else {
                         $value = trim($nodes->item(0)->nodeValue);
                         $value = RecipeParser_Times::toMinutes($value);
                     }
                 }
                 if ($value) {
                     $recipe->time[$time_key] = $value;
                 }
             }
         }
         // Yield
         $line = "";
         $nodes = $xpath->query('.//*[@itemprop="yield"]', $microdata);
         if ($nodes->length) {
             $line = trim($nodes->item(0)->nodeValue);
         } else {
             $nodes = $xpath->query('.//*[@itemprop="servingSize"]', $microdata);
             if ($nodes->length) {
                 $line = trim($nodes->item(0)->nodeValue);
             }
         }
         if ($line) {
             $line = preg_replace('/\\s+/', ' ', $line);
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Ingredients
         $nodes = null;
         // (data-vocabulary)
         if (!$nodes || !$nodes->length) {
             $nodes = $xpath->query('.//*[@itemprop="ingredient"]', $microdata);
         }
         if (!$nodes || !$nodes->length) {
             // non-standard
             $nodes = $xpath->query('.//*[@id="ingredients"]//li', $microdata);
         }
         if (!$nodes || !$nodes->length) {
             // non-standard
             $nodes = $xpath->query('.//*[@class="ingredients"]//li', $microdata);
         }
         foreach ($nodes as $node) {
             $value = $node->nodeValue;
             $value = RecipeParser_Text::formatAsOneLine($value);
             if (empty($value)) {
                 continue;
             }
             if (RecipeParser_Text::matchSectionName($value)) {
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 $recipe->appendIngredient($value);
             }
         }
         // Instructions
         $found = false;
         // Look for markup that uses <li> tags for each instruction.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="instructions"]//li', $microdata);
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Some sites will use an "instruction" class for each line.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="instruction"]//*[contains(concat(" ", normalize-space(@class), " "), " instruction ")]', $microdata);
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Either multiple instrutions nodes, or one node with a blob of text.
         if (!$found) {
             $nodes = $xpath->query('.//*[@itemprop="instructions"]', $microdata);
             if ($nodes->length > 1) {
                 // Multiple nodes
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             } else {
                 if ($nodes->length == 1) {
                     // Blob
                     $str = $nodes->item(0)->nodeValue;
                     RecipeParser_Text::parseInstructionsFromBlob($str, $recipe);
                     $found = true;
                 }
             }
         }
         // Photo
         $photo_url = "";
         if (!$photo_url) {
             // try to find open graph url
             $nodes = $xpath->query('//meta[@property="og:image"]');
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('content');
             }
         }
         if (!$photo_url) {
             $nodes = $xpath->query('.//*[@itemprop="photo"]', $microdata);
             if ($nodes->length) {
                 if ($nodes->item(0)->hasAttribute('src')) {
                     $photo_url = $nodes->item(0)->getAttribute('src');
                 } else {
                     if ($nodes->item(0)->hasAttribute('content')) {
                         $photo_url = $nodes->item(0)->getAttribute('content');
                     }
                 }
             }
         }
         if (!$photo_url) {
             // for <img> as sub-node of class="photo"
             $nodes = $xpath->query('.//*[@itemprop="photo"]//img', $microdata);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
         // Credits
         $nodes = $xpath->query('.//*[@itemprop="author"]', $microdata);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->credits = RecipeParser_Text::formatCredits($line);
         }
     }
     return $recipe;
 }
Exemple #8
0
 public static function parse($html, $url)
 {
     // Get all of the standard hrecipe stuff we can find.
     $recipe = RecipeParser_Parser_Microformat::parse($html, $url);
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $recipe->resetIngredients();
     $recipeName = $xpath->query('.//*[@itemprop="name"]');
     $value = trim($recipeName[0]->nodeValue);
     $recipe->title = $value;
     $nodes = $xpath->query('//li[@itemprop="recipeInstructions"]/*');
     if ($nodes->length) {
         foreach ($nodes as $sub) {
             $line = trim($sub->nodeValue);
             $line = RecipeParser_Text::stripLeadingNumbers($line);
             $recipe->appendInstruction($line);
         }
     }
     $image = $xpath->query('.//*[@itemprop="image"]');
     $photo_url = $image[0]->getAttribute('src');
     $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
     // Meta data
     $nodes = $xpath->query('//div[@class="recipe-metadata-wrap"]/*');
     if ($nodes->length) {
         $prepTime = $xpath->query('.//*[@itemprop="prepTime"]');
         foreach ($prepTime[0]->attributes as $sub) {
             if ($sub->nodeName == "content") {
                 $value = trim($sub->nodeValue);
                 $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($value);
             }
         }
         $prepTime = $xpath->query('.//*[@itemprop="cookTime"]');
         foreach ($prepTime[0]->attributes as $sub) {
             if ($sub->nodeName == "content") {
                 $value = trim($sub->nodeValue);
                 $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($value);
             }
         }
         $recipe->time['total'] = $recipe->time['cook'] + $recipe->time['prep'];
         $recipeYield = $xpath->query('.//*[@itemprop="recipeYield"]');
         $value = trim($recipeYield[0]->nodeValue);
         $recipe->yield = RecipeParser_Text::formatYield($value);
     }
     // Multi-stage ingredients
     $nodes = $xpath->query('//div[@class="recipe-ingredients-wrapper"]/*');
     if ($nodes->length) {
         foreach ($nodes as $node) {
             if ($node->nodeName == 'h3') {
                 $value = $node->nodeValue;
                 $value = RecipeParser_Text::formatSectionName($value);
                 $recipe->addIngredientsSection($value);
             } else {
                 if ($node->nodeName == 'ul') {
                     $subs = $xpath->query('.//li[@itemprop="ingredients"]', $node);
                     foreach ($subs as $sub) {
                         $value = trim($sub->nodeValue);
                         $recipe->appendIngredient($value);
                     }
                 }
             }
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     // Turn off libxml errors to prevent mismatched tag warnings.
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//*[@class="rTitle fn"]');
     if ($nodes->length) {
         $line = RecipeParser_Text::formatTitle($nodes->item(0)->nodeValue);
         $recipe->title = $line;
     }
     // Yield
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " yield ")]');
     if ($nodes->length) {
         $line = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($line);
     }
     // Times
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " prepTime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " rspec-cook-time ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " totaltime ")]/span');
     if ($nodes->length) {
         $line = $nodes->item(1)->getAttribute("title");
         $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($line);
     }
     // Ingredients
     $nodes = $xpath->query('//*[@class="ingredient"]');
     foreach ($nodes as $node) {
         $line = RecipeParser_Text::formatAsOneLine($node->nodeValue);
         $recipe->appendIngredient($line);
     }
     // Instructions
     $nodes = $xpath->query('//*[@class="instructions"]');
     if ($nodes->length) {
         $blob = "";
         foreach ($nodes->item(0)->childNodes as $node) {
             $blob .= RecipeParser_Text::formatAsOneLine($node->nodeValue) . " ";
             if ($node->nodeName == "p") {
                 $blob .= "\n\n";
             }
         }
         // Minor cleanup
         $blob = str_replace(" , ", ", ", $blob);
         $blob = str_replace(" . ", ". ", $blob);
         $blob = str_replace("  ", " ", $blob);
         foreach (explode("\n\n", $blob) as $line) {
             $line = RecipeParser_Text::formatAsOneLine($line);
             $recipe->appendInstruction($line);
         }
     }
     // Photo
     $nodes = $xpath->query('//a[@class="img-enlarge"]');
     if ($nodes->length) {
         $photo_url = $nodes->item(0)->getAttribute("href");
         $photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         $recipe->photo_url = $photo_url;
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     $hrecipe = null;
     if (!$hrecipe) {
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " hrecipe ")]');
         if ($nodes->length) {
             $hrecipe = $nodes->item(0);
         }
     }
     if (!$hrecipe) {
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " hRecipe ")]');
         if ($nodes->length) {
             $hrecipe = $nodes->item(0);
         }
     }
     if ($hrecipe) {
         // Title
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " fn ")]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->title = RecipeParser_Text::formatTitle($line);
         }
         // Summary
         $nodes = $xpath->query('.//*[@class="summary"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->description = RecipeParser_Text::formatAsParagraphs($line);
         }
         // Credits
         $nodes = $xpath->query('.//*[@class="author"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->credits = RecipeParser_Text::formatCredits($line);
         }
         // Photo
         $photo_url = "";
         $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " photo ")]', $hrecipe);
         if ($nodes->length) {
             $photo_url = $nodes->item(0)->getAttribute('src');
         }
         if (!$photo_url) {
             // for <img> as sub-node of class="photo"
             $nodes = $xpath->query('.//*[contains(concat(" ", normalize-space(@class), " "), " photo ")]//img', $hrecipe);
             if ($nodes->length) {
                 $photo_url = $nodes->item(0)->getAttribute('src');
             }
         }
         if ($photo_url) {
             $recipe->photo_url = RecipeParser_Text::relativeToAbsolute($photo_url, $url);
         }
         // Yield
         $nodes = $xpath->query('.//*[@class="yield"]', $hrecipe);
         if ($nodes->length) {
             $line = $nodes->item(0)->nodeValue;
             $recipe->yield = RecipeParser_Text::formatYield($line);
         }
         // Prep Times
         $nodes = $xpath->query('.//*[@class="prepTime"]//*[@class="value-title"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->getAttribute('title');
             $recipe->time['prep'] = RecipeParser_Text::iso8601ToMinutes($value);
         } else {
             $nodes = $xpath->query('.//*[@class="preptime"]', $hrecipe);
             if ($nodes->length) {
                 $value = $nodes->item(0)->nodeValue;
                 $recipe->time['prep'] = RecipeParser_Times::toMinutes($value);
             }
         }
         // Cook Times
         $nodes = $xpath->query('.//*[@class="cookTime"]//*[@class="value-title"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->getAttribute('title');
             $recipe->time['cook'] = RecipeParser_Text::iso8601ToMinutes($value);
         } else {
             $nodes = $xpath->query('.//*[@class="cooktime"]', $hrecipe);
             if ($nodes->length) {
                 $value = $nodes->item(0)->nodeValue;
                 $recipe->time['cook'] = RecipeParser_Times::toMinutes($value);
             }
         }
         // Total Time / Duration
         $nodes = $xpath->query('.//*[@class="totalTime"]//*[@class="value-title"]', $hrecipe);
         if ($nodes->length) {
             $value = $nodes->item(0)->getAttribute('title');
             $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
         } else {
             $nodes = $xpath->query('.//*[@class="duration"]//*[@class="value-title"]', $hrecipe);
             if ($nodes->length) {
                 $value = $nodes->item(0)->getAttribute('title');
                 $recipe->time['total'] = RecipeParser_Text::iso8601ToMinutes($value);
             } else {
                 $nodes = $xpath->query('.//*[@class="duration"]', $hrecipe);
                 if ($nodes->length) {
                     $value = $nodes->item(0)->nodeValue;
                     $recipe->time['total'] = RecipeParser_Times::toMinutes($value);
                 }
             }
         }
         // Ingredients
         $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " ingredient ")]');
         foreach ($nodes as $node) {
             $line = $node->nodeValue;
             $line = trim($line);
             $line = RecipeParser_Text::formatAsOneLine($line);
             // Skip lines that contain no word-like characters (sometimes used as section dividers).
             if (!preg_match("/\\w/", $line)) {
                 continue;
             }
             // Section name delineated with dashes. E.g. "---Cake---"
             if (preg_match('/^\\-+([^\\-]{1}.*[^\\-]{1})\\-+$/', $line, $m)) {
                 $line = RecipeParser_Text::formatSectionName($m[1]);
                 $recipe->addIngredientsSection($line);
                 continue;
             }
             // Section name with colon.
             if (preg_match('/^(.+)\\:$/', $line, $m)) {
                 $line = RecipeParser_Text::formatSectionName($m[1]);
                 $recipe->addIngredientsSection($line);
                 continue;
             }
             $recipe->appendIngredient($line);
         }
         // Instructions
         $found = false;
         // Look for usage of <li> to denote each step of the instructions.
         if (!$found) {
             $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " instructions ")]//li');
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Look for "instruction class for each step of the instructions.
         if (!$found) {
             $query = '//*[contains(concat(" ", normalize-space(@class), " "), " instructions ")]' . '//*[contains(concat(" ", normalize-space(@class), " "), " instruction ")]';
             $nodes = $xpath->query($query);
             if ($nodes->length) {
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             }
         }
         // Default. Multiple instructions nodes, or one with a blob of text.
         if (!$found) {
             $nodes = $xpath->query('//*[contains(concat(" ", normalize-space(@class), " "), " instructions ")]');
             if ($nodes->length > 1) {
                 // Multiple nodes
                 RecipeParser_Text::parseInstructionsFromNodes($nodes, $recipe);
                 $found = true;
             } else {
                 if ($nodes->length == 1) {
                     // Blob
                     $str = $nodes->item(0)->nodeValue;
                     RecipeParser_Text::parseInstructionsFromBlob($str, $recipe);
                     $found = true;
                 }
             }
         }
     }
     return $recipe;
 }
 public static function parse($html, $url)
 {
     $recipe = new RecipeParser_Recipe();
     libxml_use_internal_errors(true);
     $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
     $doc = new DOMDocument();
     $doc->loadHTML('<?xml encoding="UTF-8">' . $html);
     $xpath = new DOMXPath($doc);
     // Title
     $nodes = $xpath->query('//h1[@itemprop="name"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->title = $value;
     }
     // Times and yield
     // <meta content="PT3H30M" itemprop="totalTime">
     $nodes = $xpath->query('//meta[@itemprop="totalTime"]');
     if ($nodes->length) {
         if ($value = $nodes->item(0)->getAttribute('content')) {
             $value = RecipeParser_Text::iso8601ToMinutes($value);
             $recipe->time['total'] = $value;
         }
     }
     $nodes = $xpath->query('//*[@itemprop="recipeYield"]');
     if ($nodes->length) {
         $value = $nodes->item(0)->nodeValue;
         $recipe->yield = RecipeParser_Text::formatYield($value);
     }
     // Ingredients
     $nodes = $xpath->query('//div[@id = "ingredients"]/*');
     foreach ($nodes as $node) {
         if ($node->nodeName == 'h2') {
             $value = trim($node->nodeValue);
             $value = RecipeParser_Text::formatSectionName($value);
             if ($value != "Ingredients") {
                 $recipe->addIngredientsSection($value);
             }
         } else {
             if ($node->nodeName == 'ol') {
                 $subnodes = $xpath->query('./li/span', $node);
                 foreach ($subnodes as $subnode) {
                     $value = trim($subnode->nodeValue);
                     $recipe->appendIngredient($value);
                 }
             }
         }
     }
     // Instructions
     $nodes = $xpath->query('//div[@id = "directions"]/ol/li');
     foreach ($nodes as $node) {
         $value = trim($node->nodeValue);
         $value = RecipeParser_Text::stripLeadingNumbers($value);
         $parts = self::splitDirections($value);
         if ($parts['section']) {
             $parts['section'] = RecipeParser_Text::formatSectionName($parts['section']);
             $recipe->addInstructionsSection($parts['section']);
         }
         $recipe->appendInstruction($parts['direction']);
     }
     // Notes
     $nodes = $xpath->query('//div[@id = "directions"]/div[@id = "endnotes"]');
     if ($nodes->length) {
         $value = trim($nodes->item(0)->nodeValue);
         $recipe->notes = $value;
     }
     // Photo
     $nodes = $xpath->query('//img[@itemprop="image"]');
     if ($nodes && $nodes->item(0)) {
         $photo_url = $nodes->item(0)->getAttribute('src');
         if (strpos($photo_url, 'default-recipe-image.gif') === false && strpos($photo_url, 'placeholder.gif') === false) {
             $recipe->photo_url = RecipeParser_Text::formatPhotoUrl($photo_url, $url);
         }
     }
     return $recipe;
 }