Beispiel #1
0
    /**
     * Strips any invalid characters based on value/charset pairs.
     *
     * @since 4.2.0
     * @access protected
     *
     * @param array $data Array of value arrays. Each value array has the keys
     *                    'value' and 'charset'. An optional 'ascii' key can be
     *                    set to false to avoid redundant ASCII checks.
     * @return array|WP_Error The $data parameter, with invalid characters removed from
     *                        each value. This works as a passthrough: any additional keys
     *                        such as 'field' are retained in each value array. If we cannot
     *                        remove invalid characters, a WP_Error object is returned.
     */
    protected function strip_invalid_text($data)
    {
        $db_check_string = false;
        foreach ($data as &$value) {
            $charset = $value['charset'];
            if (is_array($value['length'])) {
                $length = $value['length']['length'];
                $truncate_by_byte_length = 'byte' === $value['length']['type'];
            } else {
                $length = false;
                // Since we have no length, we'll never truncate.
                // Initialize the variable to false. true would take us
                // through an unnecessary (for this case) codepath below.
                $truncate_by_byte_length = false;
            }
            // There's no charset to work with.
            if (false === $charset) {
                continue;
            }
            // Column isn't a string.
            if (!is_string($value['value'])) {
                continue;
            }
            $needs_validation = true;
            if ('latin1' === $charset || !isset($value['ascii']) && $this->check_ascii($value['value'])) {
                $truncate_by_byte_length = true;
                $needs_validation = false;
            }
            if ($truncate_by_byte_length) {
                mbstring_binary_safe_encoding();
                if (false !== $length && strlen($value['value']) > $length) {
                    $value['value'] = substr($value['value'], 0, $length);
                }
                reset_mbstring_encoding();
                if (!$needs_validation) {
                    continue;
                }
            }
            // utf8 can be handled by regex, which is a bunch faster than a DB lookup.
            if (('utf8' === $charset || 'utf8mb3' === $charset || 'utf8mb4' === $charset) && function_exists('mb_strlen')) {
                $regex = '/
					(
						(?: [\\x00-\\x7F]                  # single-byte sequences   0xxxxxxx
						|   [\\xC2-\\xDF][\\x80-\\xBF]       # double-byte sequences   110xxxxx 10xxxxxx
						|   \\xE0[\\xA0-\\xBF][\\x80-\\xBF]   # triple-byte sequences   1110xxxx 10xxxxxx * 2
						|   [\\xE1-\\xEC][\\x80-\\xBF]{2}
						|   \\xED[\\x80-\\x9F][\\x80-\\xBF]
						|   [\\xEE-\\xEF][\\x80-\\xBF]{2}';
                if ('utf8mb4' === $charset) {
                    $regex .= '
						|    \\xF0[\\x90-\\xBF][\\x80-\\xBF]{2} # four-byte sequences   11110xxx 10xxxxxx * 3
						|    [\\xF1-\\xF3][\\x80-\\xBF]{3}
						|    \\xF4[\\x80-\\x8F][\\x80-\\xBF]{2}
					';
                }
                $regex .= '){1,40}                          # ...one or more times
					)
					| .                                  # anything else
					/x';
                $value['value'] = preg_replace($regex, '$1', $value['value']);
                if (false !== $length && mb_strlen($value['value'], 'UTF-8') > $length) {
                    $value['value'] = mb_substr($value['value'], 0, $length, 'UTF-8');
                }
                continue;
            }
            // We couldn't use any local conversions, send it to the DB.
            $value['db'] = $db_check_string = true;
        }
        unset($value);
        // Remove by reference.
        if ($db_check_string) {
            $queries = array();
            foreach ($data as $col => $value) {
                if (!empty($value['db'])) {
                    // We're going to need to truncate by characters or bytes, depending on the length value we have.
                    if ('byte' === $value['length']['type']) {
                        // Using binary causes LEFT() to truncate by bytes.
                        $charset = 'binary';
                    } else {
                        $charset = $value['charset'];
                    }
                    if ($this->charset) {
                        $connection_charset = $this->charset;
                    } else {
                        $connection_charset = $this->dbh->connection_charset();
                    }
                    if (is_array($value['length'])) {
                        $queries[$col] = $this->prepare("CONVERT( LEFT( CONVERT( %s USING {$charset} ), %.0f ) USING {$connection_charset} )", $value['value'], $value['length']['length']);
                    } else {
                        if ('binary' !== $charset) {
                            // If we don't have a length, there's no need to convert binary - it will always return the same result.
                            $queries[$col] = $this->prepare("CONVERT( CONVERT( %s USING {$charset} ) USING {$connection_charset} )", $value['value']);
                        }
                    }
                    unset($data[$col]['db']);
                }
            }
            $sql = array();
            foreach ($queries as $column => $query) {
                if (!$query) {
                    continue;
                }
                $sql[] = $query . " AS x_{$column}";
            }
            $this->check_current_query = false;
            $row = $this->get_row("SELECT " . implode(', ', $sql), ARRAY_A);
            if (!$row) {
                return new WP_Error('wpdb_strip_invalid_text_failure');
            }
            foreach (array_keys($data) as $column) {
                if (isset($row["x_{$column}"])) {
                    $data[$column]['value'] = $row["x_{$column}"];
                }
            }
        }
        return $data;
    }