/** * Representative h-card * * Given the microformats on a page representing a person or organisation (h-card), find the single h-card which is * representative of the page, or null if none is found. * * @see http://microformats.org/wiki/representative-h-card-parsing * * @param array $mfs The parsed microformats of a page to search for a representative h-card * @param string $url The URL the microformats were fetched from * @return array|null Either a single h-card array structure, or null if none was found */ public static function getRepresentativeHCard(array $mfs, $url) { $hCardsMatchingUidUrlPageUrl = findMicroformatsByCallable($mfs, function ($hCard) use($url) { return hasProp($hCard, 'uid') and hasProp($hCard, 'url') and urlsMatch(getPlaintext($hCard, 'uid'), $url) and count(array_filter($hCard['properties']['url'], function ($u) use($url) { return urlsMatch($u, $url); })) > 0; }); if (!empty($hCardsMatchingUidUrlPageUrl)) { return $hCardsMatchingUidUrlPageUrl[0]; } if (!empty($mfs['rels']['me'])) { $hCardsMatchingUrlRelMe = self::findMicroformatsByCallable($mfs, function ($hCard) use($mfs) { if (hasProp($hCard, 'url')) { foreach ($mfs['rels']['me'] as $relUrl) { foreach ($hCard['properties']['url'] as $url) { if (urlsMatch($url, $relUrl)) { return true; } } } } return false; }); if (!empty($hCardsMatchingUrlRelMe)) { return $hCardsMatchingUrlRelMe[0]; } } $hCardsMatchingUrlPageUrl = findMicroformatsByCallable($mfs, function ($hCard) use($url) { return hasProp($hCard, 'url') and count(array_filter($hCard['properties']['url'], function ($u) use($url) { return urlsMatch($u, $url); })) > 0; }); if (count($hCardsMatchingUrlPageUrl) === 1) { return $hCardsMatchingUrlPageUrl[0]; } // Otherwise, no representative h-card could be found. return null; }
function getRec($newsurl, $debug = false) { global $analyshus; global $companys; if (substr($newsurl, 0, 5) != "http:") { $newsurl = "http://di.se" . $newsurl; } $html = scraperWiki::scrape($newsurl); $dom = new simple_html_dom(); $dom->load($html); $arr = array(); // bolag $rub_el = $dom->find("h1", 0); if (is_null($rub_el)) { throw new Exception("Hittade inte rubrikelementet " . $newsurl); } $rub = preg_replace("/[\n\r]/", "", getPlaintext($rub_el)); $i = 0; if (preg_match("/([\\wåäöÅÄÖ& ]+)\\s*:\\s*(.+)/i", $rub, $matches)) { $stock = strtolower_utf8(trim($matches[1])); $hus = trim($matches[2]); array_push($arr, array("stock" => $stock, "rek" => "", "to" => -1, "by" => "", "rub" => $rub, "url" => $newsurl, "text" => "", "date" => "", "currency" => "")); foreach ($analyshus as $key => $value) { if (stripos($hus, $key) > -1) { $arr[0]["by"] = $value; break; } } //print_r($arr[0]["by"] . ", l:" . count(trim($arr[0]["by"]))); if (strlen($arr[0]["by"]) == 0) { throw new Exception("'{$hus}' är inte en mäklare, rub: " . $rub . " " . $newsurl); } } else { throw new Exception("Ingen matchande rubrik hittades: " . $newsurl . " " . getPlaintext($dom->find("#articleBody", 0))); } if ($arr[0]["by"] == "") { throw new Exception("Hittade inte mäklare: " . $rub); } // kolla om bolaget finns // först kollas om nyckeln finns i arrayen, sen om ej fanns så söks arrayen igenom och om strängen finns i början av nyckel $stockname = null; if (array_key_exists(strtolower($stock), $companys)) { $stockname = $companys[strtolower($stock)]; } if (is_null($stockname)) { $stockname = getitem_array_in_string($companys, strtolower($stock)); } if (is_null($stockname)) { $stockname = getitem_array_in_string($companys, str_replace(" ", "", strtolower($stock))); } if (is_null($stockname)) { throw new Exception("{$stock} är inte en aktie " . $newsurl); } $arr[0]["stock"] = $stockname; // datum $datebox = $dom->find("#phArticle .date", 0); if (!is_null($datebox)) { $arr[0]["date"] = trim(str_replace("Uppdaterad ", "", getPlaintext($datebox))); } if ($arr[0]["date"] == "") { throw new Exception("Inget datum " . $newsurl); } // riktkurs $to = -1; $intro = $dom->find("#articleIntro", 0); $arttext = $dom->find("#articleBody", 0); $text = ""; if (!is_null($intro) && strlen(getPlaintext($intro)) > 0) { $text = getPlaintext($intro) . utf8_encode(". "); } if (!is_null($arttext)) { $text = $text . getPlaintext($arttext); } array_merge($arr[0], getRiktkursAndCurrency($text)); //$text = iconv("UTF-8","UTF-8//IGNORE",$text); $arr[0]["text"] = preg_replace("/[\n\r]/", "", $text); //preg_replace('/[^(\x20-\xFF)]*/','', $text); $rek = ""; // rekommendation $rekindex = iarray_in_string(array("rekommendation", "höjer", "sänker"), $text); if ($rekindex > -1) { $words = explode(" ", strtolower(preg_replace("/[!\\.,]/", "", substr($text, $rekindex)))); $starkt = false; for ($i = 0; $i < count($words); $i++) { switch ($words[$i]) { case "starkt": $starkt = true; case "köp": case "övervikt": case "öka": case "köprekommendation": case "outperform": $rek = "+"; break; case "buy": $rek = "+"; if ($i > 0 && strtolower($words[$i - 1]) == "conviction") { $rek = "++"; } break; case "perform": if ($i > 0 && strtolower($words[$i - 1]) == "sector") { $rek = "-"; } case "minska": case "undervikt": case "sälj": case "säljrekommendation": case "underperform": $rek = "-"; break; case "neutral": case "jämvikt": case "behåll": case "behållrekommendation": $rek = "0"; break; } //print $words[$i] . " => " . $rek; if (strlen($rek) > 0) { if ($rek != "0" && $starkt) { $rek = $rek . $rek; } // else nått konstigt om else inträffar här break; } } if (strlen($rek) > 0) { $arr[0]["rek"] = $rek; } } if ($rek == "") { if (stripos($text, "köprekommendation") > -1) { $rek = "+"; } if (iarray_in_string(array("säljrekommendation", "säljlista"), $text) > -1) { $rek = "-"; } if (stripos($text, "behållrekommendation") > -1) { $rek = "0"; } if (strlen($rek) > 0) { $arr[0]["rek"] = $rek; } } if ($debug) { print_r($arr[0]); } return $arr; }