/** * Check if there are URLs in the string and if so, make hyperlinks out of them. */ private function find_urls($string) { $newstring = ''; $words = explode(' ', $string); foreach ($words as $word) { if (preg_match('/^(www\\.|https?:\\/\\/)/i', $word) && ($urldata = urltools::get_elements($word)) !== false) { $newstring .= '<a href="' . htmlspecialchars($urldata['url']) . '">' . htmlspecialchars($urldata['url']) . '</a> '; } else { $newstring .= htmlspecialchars($word) . ' '; } } return rtrim($newstring); }
protected function set_normal($time, $csnick, $line) { if (!$this->validate_nick($csnick)) { output::output('debug', __METHOD__ . '(): invalid nick: \'' . $csnick . '\' on line ' . $this->linenum); return null; } $nick = $this->add_nick($csnick, $time); $line_length = mb_strlen($line, 'UTF-8'); $this->nick_objs[$nick]->add_value('characters', $line_length); $this->nick_objs[$nick]->set_value('lasttalked', $this->date . ' ' . $time); /** * Keep track of monologues. */ if ($nick !== $this->prevnick) { /** * Someone else typed a line and the previous streak is interrupted. Check if * the streak qualifies as a monologue and store it. */ if ($this->streak >= 5) { /** * If the current line count is 0 then $prevnick is not known yet (only seen in * previous parse run). It's safe to assume that $prevnick is a valid nick since * it was set by set_normal(). Create an object for it here so the monologue * data can be added. It doesn't matter if $prevnick is lowercase since it won't * be updated before it is actually seen (i.e. on any other activity). */ if ($this->l_total === 0) { $this->add_nick($this->prevnick, null); } $this->nick_objs[$this->prevnick]->add_value('monologues', 1); if ($this->streak > $this->nick_objs[$this->prevnick]->get_value('topmonologue')) { $this->nick_objs[$this->prevnick]->set_value('topmonologue', $this->streak); } } $this->prevnick = $nick; $this->streak = 0; } $this->streak++; /** * Increase line counts for relevant day, part of day, and hour. */ $day = strtolower(date('D', strtotime($this->date))); $hour = (int) substr($time, 0, 2); if ($hour >= 0 && $hour <= 5) { $this->l_night++; $this->nick_objs[$nick]->add_value('l_' . $day . '_night', 1); $this->nick_objs[$nick]->add_value('l_night', 1); } elseif ($hour >= 6 && $hour <= 11) { $this->l_morning++; $this->nick_objs[$nick]->add_value('l_' . $day . '_morning', 1); $this->nick_objs[$nick]->add_value('l_morning', 1); } elseif ($hour >= 12 && $hour <= 17) { $this->l_afternoon++; $this->nick_objs[$nick]->add_value('l_' . $day . '_afternoon', 1); $this->nick_objs[$nick]->add_value('l_afternoon', 1); } elseif ($hour >= 18 && $hour <= 23) { $this->l_evening++; $this->nick_objs[$nick]->add_value('l_' . $day . '_evening', 1); $this->nick_objs[$nick]->add_value('l_evening', 1); } $this->nick_objs[$nick]->add_value('l_' . ($hour < 10 ? '0' . $hour : $hour), 1); $this->nick_objs[$nick]->add_value('l_total', 1); $this->{'l_' . ($hour < 10 ? '0' . $hour : $hour)}++; $this->l_total++; /** * Words are simply considered character groups separated by whitespace. */ $skipquote = false; $words = explode(' ', $line); $this->nick_objs[$nick]->add_value('words', count($words)); foreach ($words as $csword) { /** * Keep track of all character groups composed of the letters found in the Basic * Latin and Latin-1 Supplement character sets, the Hyphen (used properly), and * any multibyte characters beyond those two sets (found in UTF-8) regardless of * their meaning. The regular expression checks for any characters not wanted in * the word - from the aforementioned Latin sets. Note that normalize_line() * already took all the dirt out. This method of finding words is not 100% * accurate, but it serves its purpose. */ if ($this->wordtracking && !preg_match('/^-|-$|--|[\\x21-\\x2C\\x2E-\\x40\\x5B-\\x60\\x7B-\\x7E]|\\xC2[\\xA1-\\xBF]|\\xC3\\x97|\\xC3\\xB7|\\xEF\\xBF\\xBD/', $csword)) { $word_length = mb_strlen($csword, 'UTF-8'); /** * Words consisting of 30+ characters are most likely not real words. */ if ($word_length <= 30) { $this->add_word($csword, $word_length); } /** * Behold the amazing smileys regular expression. Cannot evaluate as a word (see * above). */ } elseif (preg_match('/^(:([][)(pd\\/ox\\\\|3<>s]|-[)d\\/p(]|\'\\()|;([])(pxd\\/o]|-\\)|_;)|[:;](\\)\\)|\\(\\()|\\\\o\\/|<3|=[])p\\/\\\\d(x]|d:|8\\)|-[_.]-|>:\\()$/i', $csword)) { $this->nick_objs[$nick]->add_value($this->smileys[strtolower($csword)], 1); /** * Only catch URLs which were intended to be clicked on. Most clients can handle * URLs that begin with "www." or a scheme like "http://". */ } elseif (preg_match('/^(www\\.|https?:\\/\\/)/i', $csword)) { /** * Regardless of it being a valid URL or not, set $skipquote to true, which * ensures that lines which contain a URL are not used as a quote. Quotes with * URLs in them often look messy/confusing on the stats page. */ $skipquote = true; if (($urldata = urltools::get_elements($csword)) !== false) { /** * Track URLs of up to a sensible limit of 1024 characters in length. */ if (strlen($urldata['url']) <= 1024) { $this->add_url($urldata, $time, $nick); $this->nick_objs[$nick]->add_value('urls', 1); } } else { output::output('debug', __METHOD__ . '(): invalid url: \'' . $csword . '\' on line ' . $this->linenum); } } } /** * Track quotes/example lines of up to a sensible limit of 255 characters in * length. This applies to all of the types seen below. */ if (!$skipquote && $line_length <= 255) { $this->nick_objs[$nick]->add_quote('quote', $line, $line_length); } /** * Uppercased lines should consist of 2 or more characters, be completely * uppercased, and have less than 50% non-letter characters from the Basic Latin * and Latin-1 Supplement character sets in them. */ if ($line_length >= 2 && mb_strtoupper($line, 'UTF-8') === $line && mb_strlen(preg_replace('/[\\x21-\\x40\\x5B-\\x60\\x7B-\\x7E]|\\xC2[\\xA1-\\xBF]|\\xC3\\x97|\\xC3\\xB7|\\xEF\\xBF\\xBD/S', '', $line), 'UTF-8') * 2 > $line_length) { $this->nick_objs[$nick]->add_value('uppercased', 1); if (!$skipquote && $line_length <= 255) { $this->nick_objs[$nick]->add_quote('ex_uppercased', $line, $line_length); } } if (preg_match('/!$/', $line)) { $this->nick_objs[$nick]->add_value('exclamations', 1); if (!$skipquote && $line_length <= 255) { $this->nick_objs[$nick]->add_quote('ex_exclamations', $line, $line_length); } } elseif (preg_match('/\\?$/', $line)) { $this->nick_objs[$nick]->add_value('questions', 1); if (!$skipquote && $line_length <= 255) { $this->nick_objs[$nick]->add_quote('ex_questions', $line, $line_length); } } }