private function handleLogEvent(PhutilEvent $event) { $daemon = $this->getDaemon($event->getValue('id')); // TODO: This is a bit awkward for historical reasons, clean it up after // removing Conduit. $message = $event->getValue('message'); $context = $event->getValue('context'); if (strlen($context) && $context !== $message) { $message = "({$context}) {$message}"; } $type = $event->getValue('type'); $message = phutil_utf8ize($message); id(new PhabricatorDaemonLogEvent())->setLogID($daemon->getID())->setLogType($type)->setMessage((string) $message)->setEpoch(time())->save(); switch ($type) { case 'WAIT': $current_status = PhabricatorDaemonLog::STATUS_WAIT; break; default: $current_status = PhabricatorDaemonLog::STATUS_RUNNING; break; } if ($current_status !== $daemon->getStatus()) { $daemon->setStatus($current_status)->save(); } }
private static function truncateValue($value, $length) { if (is_string($value)) { if (strlen($value) <= $length) { return $value; } else { // NOTE: PhutilUTF8StringTruncator has huge runtime for giant strings. return phutil_utf8ize(substr($value, 0, $length) . "\n<...>"); } } else { if (is_array($value)) { foreach ($value as $key => $v) { if ($length <= 0) { $value['<...>'] = '<...>'; unset($value[$key]); } else { $v = self::truncateValue($v, $length); $length -= strlen($v); $value[$key] = $v; } } return $value; } else { return $value; } } }
public function testUTF8ize_owl_isCuteAndFerocious() { // This was once a ferocious owl when we used to use "?" as the replacement // character instead of U+FFFD, but now he is sort of not as cute or // ferocious. $input = "M(o���o)M"; $expect = "M(o���o)M"; $result = phutil_utf8ize($input); $this->assertEqual($expect, $result); }
/** * This method is kind of awkward here but both the SVN message and * change parsers use it. */ protected function getSVNLogXMLObject($uri, $revision, $verbose = false) { if ($verbose) { $verbose = '--verbose'; } list($xml) = $this->repository->execxRemoteCommand("log --xml {$verbose} --limit 1 %s@%d", $uri, $revision); // Subversion may send us back commit messages which won't parse because // they have non UTF-8 garbage in them. Slam them into valid UTF-8. $xml = phutil_utf8ize($xml); return new SimpleXMLElement($xml); }
public function __construct($status_code, $body) { // NOTE: Avoiding phutil_utf8_shorten() here because this isn't lazy // and responses may be large. if (strlen($body) > 512) { $excerpt = substr($body, 0, 512) . '...'; } else { $excerpt = $body; } $this->excerpt = phutil_utf8ize($excerpt); parent::__construct($status_code); }
private function parseSVNLogXML($xml) { $xml = phutil_utf8ize($xml); $result = array(); $log = new SimpleXMLElement($xml); foreach ($log->logentry as $entry) { $commit = (int) $entry['revision']; $epoch = (int) strtotime((string) $entry->date[0]); $result[$commit] = $epoch; } return $result; }
public function parseCommit(PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit) { // NOTE: %B was introduced somewhat recently in git's history, so pull // commit message information with %s and %b instead. // Even though we pass --encoding here, git doesn't always succeed, so // we try a little harder, since git *does* tell us what the actual encoding // is correctly (unless it doesn't; encoding is sometimes empty). list($info) = $repository->execxLocalCommand('log -n 1 --encoding=%s --format=%s %s --', 'UTF-8', implode('%x00', array('%e', '%cn', '%ce', '%an', '%ae', '%s%n%n%b')), $commit->getCommitIdentifier()); $parts = explode("", $info); $encoding = array_shift($parts); // See note above - git doesn't always convert the encoding correctly. $do_convert = false; if (strlen($encoding) && strtoupper($encoding) != 'UTF-8') { if (function_exists('mb_convert_encoding')) { $do_convert = true; } } foreach ($parts as $key => $part) { if ($do_convert) { $parts[$key] = mb_convert_encoding($part, 'UTF-8', $encoding); } $parts[$key] = phutil_utf8ize($part); } $committer_name = $parts[0]; $committer_email = $parts[1]; $author_name = $parts[2]; $author_email = $parts[3]; $message = $parts[4]; if (strlen($author_email)) { $author = "{$author_name} <{$author_email}>"; } else { $author = "{$author_name}"; } if (strlen($committer_email)) { $committer = "{$committer_name} <{$committer_email}>"; } else { $committer = "{$committer_name}"; } if ($committer == $author) { $committer = null; } $this->updateCommitData($author, $message, $committer); if ($this->shouldQueueFollowupTasks()) { $task = new PhabricatorWorkerTask(); $task->setTaskClass('PhabricatorRepositoryGitCommitChangeParserWorker'); $task->setData(array('commitID' => $commit->getID())); $task->save(); } }
public function parseCommit(PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit) { list($stdout) = $repository->execxLocalCommand('log --template %s --rev %s', '{author}\\n{desc}', $commit->getCommitIdentifier()); list($author, $message) = explode("\n", $stdout, 2); $author = phutil_utf8ize($author); $message = phutil_utf8ize($message); $message = trim($message); $this->updateCommitData($author, $message); if ($this->shouldQueueFollowupTasks()) { $task = new PhabricatorWorkerTask(); $task->setTaskClass('PhabricatorRepositoryMercurialCommitChangeParserWorker'); $task->setData(array('commitID' => $commit->getID())); $task->save(); } }
private function loadSubversionCommitRef() { $repository = $this->getRepository(); list($xml) = $repository->execxRemoteCommand('log --xml --limit 1 %s', $repository->getSubversionPathURI(null, $this->identifier)); // Subversion may send us back commit messages which won't parse because // they have non UTF-8 garbage in them. Slam them into valid UTF-8. $xml = phutil_utf8ize($xml); $log = new SimpleXMLElement($xml); $entry = $log->logentry[0]; $author = (string) $entry->author; $message = (string) $entry->msg; list($author_name, $author_email) = $this->splitUserIdentifier($author); // No hashes in Subversion. $hashes = array(); return id(new DiffusionCommitRef())->setAuthorName($author_name)->setAuthorEmail($author_email)->setMessage($message)->setHashes($hashes); }
public function parseCommit(PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit) { // NOTE: %B was introduced somewhat recently in git's history, so pull // commit message information with %s and %b instead. list($info) = $repository->execxLocalCommand("log -n 1 --encoding='UTF-8' --pretty=format:%%an%%x00%%s%%n%%n%%b %s", $commit->getCommitIdentifier()); list($author, $message) = explode("", $info); // Make sure these are valid UTF-8. $author = phutil_utf8ize($author); $message = phutil_utf8ize($message); $message = trim($message); $this->updateCommitData($author, $message); if ($this->shouldQueueFollowupTasks()) { $task = new PhabricatorWorkerTask(); $task->setTaskClass('PhabricatorRepositoryGitCommitChangeParserWorker'); $task->setData(array('commitID' => $commit->getID())); $task->save(); } }
public function __construct($status_code, $body, array $headers, $expect = null) { // NOTE: Avoiding PhutilUTF8StringTruncator here because this isn't lazy // and responses may be large. if (strlen($body) > 512) { $excerpt = substr($body, 0, 512) . '...'; } else { $excerpt = $body; } $content_type = BaseHTTPFuture::getHeader($headers, 'Content-Type'); $match = null; if (preg_match('/;\\s*charset=([^;]+)/', $content_type, $match)) { $encoding = trim($match[1], "\"'"); try { $excerpt = phutil_utf8_convert($excerpt, 'UTF-8', $encoding); } catch (Exception $ex) { } } $this->excerpt = phutil_utf8ize($excerpt); $this->expect = $expect; parent::__construct($status_code); }
/** * This method is kind of awkward here but both the SVN message and * change parsers use it. */ protected function getSVNLogXMLObject($uri, $revision, $verbose = false) { if ($verbose) { $verbose = '--verbose'; } try { list($xml) = execx("svn log --xml {$verbose} --limit 1 --non-interactive %s@%d", $uri, $revision); } catch (CommandException $ex) { // HTTPS is generally faster and more reliable than svn+ssh, but some // commit messages with non-UTF8 text can't be retrieved over HTTPS, see // Facebook rE197184 for one example. Make an attempt to fall back to // svn+ssh if we've failed outright to retrieve the message. $fallback_uri = new PhutilURI($uri); if ($fallback_uri->getProtocol() != 'https') { throw $ex; } $fallback_uri->setProtocol('svn+ssh'); list($xml) = execx("svn log --xml {$verbose} --limit 1 --non-interactive %s@%d", $fallback_uri, $revision); } // Subversion may send us back commit messages which won't parse because // they have non UTF-8 garbage in them. Slam them into valid UTF-8. $xml = phutil_utf8ize($xml); return new SimpleXMLElement($xml); }
/** * @phutil-external-symbol class PhabricatorStartup */ private function serveGitRequest(PhabricatorRepository $repository, PhabricatorUser $viewer) { $request = $this->getRequest(); $request_path = $this->getRequestDirectoryPath($repository); $repository_root = $repository->getLocalPath(); // Rebuild the query string to strip `__magic__` parameters and prevent // issues where we might interpret inputs like "service=read&service=write" // differently than the server does and pass it an unsafe command. // NOTE: This does not use getPassthroughRequestParameters() because // that code is HTTP-method agnostic and will encode POST data. $query_data = $_GET; foreach ($query_data as $key => $value) { if (!strncmp($key, '__', 2)) { unset($query_data[$key]); } } $query_string = http_build_query($query_data, '', '&'); // We're about to wipe out PATH with the rest of the environment, so // resolve the binary first. $bin = Filesystem::resolveBinary('git-http-backend'); if (!$bin) { throw new Exception(pht('Unable to find `%s` in %s!', 'git-http-backend', '$PATH')); } $env = array('REQUEST_METHOD' => $_SERVER['REQUEST_METHOD'], 'QUERY_STRING' => $query_string, 'CONTENT_TYPE' => $request->getHTTPHeader('Content-Type'), 'HTTP_CONTENT_ENCODING' => $request->getHTTPHeader('Content-Encoding'), 'REMOTE_ADDR' => $_SERVER['REMOTE_ADDR'], 'GIT_PROJECT_ROOT' => $repository_root, 'GIT_HTTP_EXPORT_ALL' => '1', 'PATH_INFO' => $request_path, 'REMOTE_USER' => $viewer->getUsername()) + $this->getCommonEnvironment($viewer); $input = PhabricatorStartup::getRawInput(); $command = csprintf('%s', $bin); $command = PhabricatorDaemon::sudoCommandAsDaemonUser($command); list($err, $stdout, $stderr) = id(new ExecFuture('%C', $command))->setEnv($env, true)->write($input)->resolve(); if ($err) { if ($this->isValidGitShallowCloneResponse($stdout, $stderr)) { // Ignore the error if the response passes this special check for // validity. $err = 0; } } if ($err) { return new PhabricatorVCSResponse(500, pht('Error %d: %s', $err, phutil_utf8ize($stderr))); } return id(new DiffusionGitResponse())->setGitData($stdout); }
public function renderCow() { $width = 40; $template = $this->template; // Real ".cow" files are Perl scripts which define a variable called // "$the_cow". We aren't going to interpret Perl, so strip all this stuff // (and any comments in the file) away. $template = phutil_split_lines($template, true); $keep = array(); $is_perl_cowfile = false; foreach ($template as $key => $line) { if (preg_match('/^#/', $line)) { continue; } if (preg_match('/^\\s*\\$the_cow/', $line)) { $is_perl_cowfile = true; continue; } if (preg_match('/^\\s*EOC\\s*$/', $line)) { continue; } $keep[] = $line; } $template = implode('', $keep); // Original .cow files are perl scripts which contain escaped sequences. // We attempt to unescape here by replacing any character preceded by a // backslash/escape with just that character. if ($is_perl_cowfile) { $template = preg_replace('/\\\\(.)/', '$1', $template); } $template = preg_replace_callback('/\\$([a-z]+)/', array($this, 'replaceTemplateVariable'), $template); if ($template === false) { throw new Exception(pht('Failed to replace template variables while rendering cow!')); } $lines = $this->text; // TODO: It would be nice to use a utf8 soft wrap here instead, but we // do not currently have one. Soft wrap first, then force to utf8. $lines = wordwrap($lines, $width - 4, "\n", true); $lines = phutil_split_lines($lines, false); foreach ($lines as $key => $line) { $lines[$key] = phutil_utf8ize($line); } if ($this->action == 'think') { $borders = '((()))'; } else { if (count($lines) == 1) { $borders = '<<<>>>'; } else { $borders = '/|\\\\|/'; } } $size = 0; foreach ($lines as $line) { $size = max(strlen($line), $size); } $balloon = array(); $balloon[] = ' ' . str_repeat('_', $size + 2); $lines = array_values($lines); $last = count($lines) - 1; foreach ($lines as $idx => $line) { if ($idx == 0) { $l = $borders[0]; $r = $borders[3]; } else { if ($idx == $last) { $l = $borders[2]; $r = $borders[5]; } else { $l = $borders[1]; $r = $borders[4]; } } $balloon[] = $l . ' ' . str_pad($line, $size) . ' ' . $r; } $balloon[] = ' ' . str_repeat('-', $size + 2); $balloon = implode("\n", $balloon); return rtrim($balloon . "\n" . $template); }
/** * Produce a human-readable explanation why a value can not be JSON-encoded. * * @param wild Value to validate. * @param string Path within the object to provide context. * @return string|null Explanation of why it can't be encoded, or null. */ function phutil_validate_json($value, $path = '') { if ($value === null) { return; } if ($value === true) { return; } if ($value === false) { return; } if (is_int($value)) { return; } if (is_float($value)) { return; } if (is_array($value)) { foreach ($value as $key => $subvalue) { if (strlen($path)) { $full_key = $path . ' > '; } else { $full_key = ''; } if (!phutil_is_utf8($key)) { $full_key = $full_key . phutil_utf8ize($key); return pht('Dictionary key "%s" is not valid UTF8, and can not be JSON encoded.', $full_key); } $full_key .= $key; $result = phutil_validate_json($subvalue, $full_key); if ($result !== null) { return $result; } } } if (is_string($value)) { if (!phutil_is_utf8($value)) { $display = substr($value, 0, 256); $display = phutil_utf8ize($display); if (!strlen($path)) { return pht('String value is not valid UTF8, and can not be JSON encoded: %s', $display); } else { return pht('Dictionary value at key "%s" is not valid UTF8, and can not be ' . 'JSON encoded: %s', $path, $display); } } } return; }
private function inlinePatch(PhabricatorMetaMTAMailBody $body, PhabricatorRepositoryCommit $commit) { if (!$this->getRawPatch()) { return; } $inline_key = 'metamta.diffusion.inline-patches'; $inline_patches = PhabricatorEnv::getEnvConfig($inline_key); if (!$inline_patches) { return; } $repository = $commit->getRepository(); $raw_patch = $this->getRawPatch(); $result = null; $len = substr_count($raw_patch, "\n"); if ($len <= $inline_patches) { // We send email as utf8, so we need to convert the text to utf8 if // we can. $encoding = $repository->getDetail('encoding', 'UTF-8'); if ($encoding) { $raw_patch = phutil_utf8_convert($raw_patch, 'UTF-8', $encoding); } $result = phutil_utf8ize($raw_patch); } if ($result) { $result = "PATCH\n\n{$result}\n"; } $body->addRawSection($result); }
private function verifySubversionRoot(PhabricatorRepository $repository) { list($xml) = $repository->execxRemoteCommand('info --xml %s', $repository->getSubversionPathURI()); $xml = phutil_utf8ize($xml); $xml = new SimpleXMLElement($xml); $remote_root = (string) $xml->entry[0]->repository[0]->root[0]; $expect_root = $repository->getSubversionPathURI(); $normal_type_svn = PhabricatorRepositoryURINormalizer::TYPE_SVN; $remote_normal = id(new PhabricatorRepositoryURINormalizer($normal_type_svn, $remote_root))->getNormalizedPath(); $expect_normal = id(new PhabricatorRepositoryURINormalizer($normal_type_svn, $expect_root))->getNormalizedPath(); if ($remote_normal != $expect_normal) { throw new Exception(pht('Repository "%s" does not have a correctly configured remote URI. ' . 'The remote URI for a Subversion repository MUST point at the ' . 'repository root. The root for this repository is "%s", but the ' . 'configured URI is "%s". To resolve this error, set the remote URI ' . 'to point at the repository root. If you want to import only part ' . 'of a Subversion repository, use the "Import Only" option.', $repository->getDisplayName(), $remote_root, $expect_root)); } }
protected function getUTF8StringFromStorage($string, $encoding) { if ($encoding == 'utf8') { return $string; } if (function_exists('mb_detect_encoding')) { if (strlen($encoding)) { $try_encodings = array($encoding); } else { // TODO: This is pretty much a guess, and probably needs to be // configurable in the long run. $try_encodings = array('JIS', 'EUC-JP', 'SJIS', 'ISO-8859-1'); } $guess = mb_detect_encoding($string, $try_encodings); if ($guess) { return mb_convert_encoding($string, 'UTF-8', $guess); } } return phutil_utf8ize($string); }
/** * Sometimes, tab data includes binary information (like INSERT queries which * write file data into the database). To successfully JSON encode it, we * need to convert it to UTF-8. */ private function sanitizeForJSON($data) { if (is_object($data)) { return '<object:' . get_class($data) . '>'; } else { if (is_array($data)) { foreach ($data as $key => $value) { $data[$key] = $this->sanitizeForJSON($value); } return $data; } else { // Truncate huge strings. Since the data doesn't really matter much, // just truncate bytes to avoid PhutilUTF8StringTruncator overhead. $length = strlen($data); $max = 4096; if ($length > $max) { $data = substr($data, 0, $max) . '...<' . $length . ' bytes>...'; } return phutil_utf8ize($data); } } }
/** * Sometimes, tab data includes binary information (like INSERT queries which * write file data into the database). To successfully JSON encode it, we * need to convert it to UTF-8. */ private function sanitizeForJSON($data) { if (is_object($data)) { return '<object:' . get_class($data) . '>'; } else { if (is_array($data)) { foreach ($data as $key => $value) { $data[$key] = $this->sanitizeForJSON($value); } return $data; } else { return phutil_utf8ize($data); } } }
private function getSVNLogXMLObject(PhabricatorRepository $repository, $uri, $revision) { list($xml) = $repository->execxRemoteCommand('log --xml --verbose --limit 1 %s@%d', $uri, $revision); // Subversion may send us back commit messages which won't parse because // they have non UTF-8 garbage in them. Slam them into valid UTF-8. $xml = phutil_utf8ize($xml); return new SimpleXMLElement($xml); }
private function buildPatch(PhabricatorMetaMTAMail $template, PhabricatorRepository $repository, PhabricatorRepositoryCommit $commit) { $attach_key = 'metamta.diffusion.attach-patches'; $inline_key = 'metamta.diffusion.inline-patches'; $attach_patches = PhabricatorEnv::getEnvConfig($attach_key); $inline_patches = PhabricatorEnv::getEnvConfig($inline_key); if (!$attach_patches && !$inline_patches) { return; } $encoding = $repository->getDetail('encoding', 'UTF-8'); $result = null; $patch_error = null; try { $raw_patch = $this->loadRawPatchText($repository, $commit); if ($attach_patches) { $commit_name = $repository->formatCommitName($commit->getCommitIdentifier()); $template->addAttachment(new PhabricatorMetaMTAAttachment($raw_patch, $commit_name . '.patch', 'text/x-patch; charset=' . $encoding)); } } catch (Exception $ex) { phlog($ex); $patch_error = 'Unable to generate: ' . $ex->getMessage(); } if ($patch_error) { $result = $patch_error; } else { if ($inline_patches) { $len = substr_count($raw_patch, "\n"); if ($len <= $inline_patches) { // We send email as utf8, so we need to convert the text to utf8 if // we can. if ($encoding) { $raw_patch = phutil_utf8_convert($raw_patch, 'UTF-8', $encoding); } $result = phutil_utf8ize($raw_patch); } } } if ($result) { $result = "PATCH\n\n{$result}\n"; } return $result; }
/** * Find the words which are part of the query string, and bold them in a * result string. This makes it easier for users to see why a result * matched their query. */ private function emboldenQuery($str) { $query = $this->query->getParameter('query'); if (!strlen($query) || !strlen($str)) { return $str; } // This algorithm is safe but not especially fast, so don't bother if // we're dealing with a lot of data. This mostly prevents silly/malicious // queries from doing anything bad. if (strlen($query) + strlen($str) > 2048) { return $str; } // Keep track of which characters we're going to make bold. This is // byte oriented, but we'll make sure we don't put a bold in the middle // of a character later. $bold = array_fill(0, strlen($str), false); // Split the query into words. $parts = preg_split('/ +/', $query); // Find all occurrences of each word, and mark them to be emboldened. foreach ($parts as $part) { $part = trim($part); $part = trim($part, '"+'); if (!strlen($part)) { continue; } $matches = null; $has_matches = preg_match_all('/(?:^|\\b)(' . preg_quote($part, '/') . ')/i', $str, $matches, PREG_OFFSET_CAPTURE); if (!$has_matches) { continue; } // Flag the matching part of the range for boldening. foreach ($matches[1] as $match) { $offset = $match[1]; for ($ii = 0; $ii < strlen($match[0]); $ii++) { $bold[$offset + $ii] = true; } } } // Split the string into ranges, applying bold styling as required. $out = array(); $buf = ''; $pos = 0; $is_bold = false; // Make sure this is UTF8 because phutil_utf8v() will explode if it isn't. $str = phutil_utf8ize($str); foreach (phutil_utf8v($str) as $chr) { if ($bold[$pos] != $is_bold) { if (strlen($buf)) { if ($is_bold) { $out[] = phutil_tag('strong', array(), $buf); } else { $out[] = $buf; } $buf = ''; } $is_bold = !$is_bold; } $buf .= $chr; $pos += strlen($chr); } if (strlen($buf)) { if ($is_bold) { $out[] = phutil_tag('strong', array(), $buf); } else { $out[] = $buf; } } return $out; }