public function testDecodeRecordBig() { $runs = 10; $fieldsCnt = 10; $pwd = dirname(realpath(__FILE__)); $text = file_get_contents($pwd . '/data/CharterofFundamentalRightsoftheEuropeanUnion.txt'); $content = array(); // Prepare some strings for ($i = 0; $i < $fieldsCnt; $i++) { $temp = $text; for ($j = 0; $j < 5; $j++) { $pos = rand(0, strlen($text)); $temp = substr($temp, 0, $pos) . '"' . substr($temp, $pos + 1); } $content[] = 'text_' . $i . ':' . OrientDBRecordEncoder::encodeString($temp); } // Prepare some booleans for ($i = 0; $i < $fieldsCnt; $i++) { $content[] = sprintf('bool_%1$s:%2$s', $i, rand(0, 1) ? 'true' : 'false'); } // Prepare some links for ($i = 0; $i < $fieldsCnt; $i++) { $content[] = sprintf('link_%1$s:#%2$s:%3$s', $i, rand(1, 20), rand(0, 500000)); } // Prepare some numbers for ($i = 0; $i < $fieldsCnt; $i++) { $content[] = sprintf('num_%1$s:%2$ff', $i, rand(-2000000, 2000000)); } // Some map $map = array(); for ($i = 0; $i < $fieldsCnt; $i++) { $map[] = sprintf('"very.long.map_%1$05d":%2$d', $i, rand(-2000, 2000)); } $content[] = 'map:{' . implode(',', $map) . '}'; $content = implode(',', $content); var_dump(strlen($content)); $timeStart = microtime(true); for ($i = 0; $i < $runs; $i++) { $record = new OrientDBRecord(); $record->content = $content; $record->parse(); } $timeEnd = microtime(true); $this->assertNotEmpty($record->data); echo $timeEnd - $timeStart; }
/** * Check, if current dataset was already de-serialized * @return void */ private function isParsed() { if (!is_null($this->record)) { // If we have link to parent record $this->record->parse(); } }
public function testParseRecordContentSetAndList() { $content = 'JavaComplexTestClass@children:{"first":#24:4,"The Observer":#24:22},name:"Silvester",list:[#24:0,#24:1,#24:2,#24:3],enumList:["ENUM1","ENUM2"],enumSet:<"ENUM1","ENUM3">,enumMap:{"2":"ENUM3","1":"ENUM2"}'; $record = new OrientDBRecord(); $record->content = $content; $record->parse(); $this->assertSame('JavaComplexTestClass', $record->className); $this->assertInternalType('array', $record->data->children); $this->assertCount(2, $record->data->children); $this->assertSame('Silvester', $record->data->name); $this->assertInternalType('array', $record->data->list); $this->assertCount(4, $record->data->list); $this->assertInternalType('array', $record->data->enumList); $this->assertCount(2, $record->data->enumList); $this->assertInternalType('array', $record->data->enumSet); $this->assertCount(2, $record->data->enumSet); $this->assertInternalType('array', $record->data->enumMap); $this->assertCount(2, $record->data->enumMap); $this->assertEquals(array('2' => 'ENUM3', '1' => 'ENUM2'), $record->data->enumMap); }
public function testResetDataSpeed() { $steps = 10000; $new_start = microtime(true); for ($i = 0; $i < $steps; $i++) { $record = new OrientDBRecord(); $record->className = 'TestClass'; $record->data->field1 = 'Data 1'; $record->data->field2 = 13121982; $record->data->field3 = true; } $new_end = microtime(true) - $new_start; $reset_start = microtime(true); $record = new OrientDBRecord(); for ($i = 0; $i < $steps; $i++) { $record->resetData(); $record->data->field1 = 'Data 1'; $record->data->field2 = 13121982; $record->data->field3 = true; } $reset_end = microtime(true) - $reset_start; $this->assertLessThanOrEqual($new_end, $reset_end, $new_end . ' !> ' . $reset_end); }
/** * Parses $this->content and populates $this->data and $this->className * @throws OrientDBDeSerializeException * @return void */ protected function decode() { // Parse record content // There is no need to use OrientDBRecordData here, as data will be copied for root record, and no parsing on demand for embedded records is made $this->data = new StdClass(); // initial state $this->state = self::STATE_GUESS; // is parsing set $isSet = false; // is parsing list $isList = false; // is parsing a map $isMap = false; // is escape symbol $escape = false; $contentLength = strlen($this->content); while ($this->i <= $contentLength && $this->continue) { $char = substr($this->content, $this->i, 1); $cCode = ord($char); if ($cCode >= 0x41 && $cCode <= 0x5a || $cCode >= 0x61 && $cCode <= 0x7a || $cCode === 0x5f) { $cClass = self::CCLASS_WORD; } elseif ($cCode >= 0x30 && $cCode <= 0x39) { $cClass = self::CCLASS_NUMBER; } else { $cClass = self::CCLASS_OTHER; } switch ($this->state) { case self::STATE_GUESS: $this->state = self::STATE_NAME; $this->buffer = $char; $this->i++; break; case self::STATE_NAME: if ($cCode === self::CCODE_COLON) { // Colon found - switch state to value collecting $this->state = self::STATE_VALUE; // fill token with data $this->stackPush(self::TTYPE_NAME); } elseif ($cCode === self::CCODE_AT) { // @ found - this was class name // start to collect name - no state change // fill token with data $this->stackPush(self::TTYPE_CLASS); } else { // Trying to fast-forward name collecting if ($this->i < strlen($this->content)) { // This can be field name or class name $pos_colon = strpos($this->content, ':', $this->i); $pos_at = strpos($this->content, '@', $this->i); // Check, which one is closest if ($pos_at !== false) { $pos = min($pos_at, $pos_colon); } else { $pos = $pos_colon; } } else { $pos = false; } if ($pos !== false && $pos > $this->i) { // Position is found and we had enough length to perform fast-forward $this->buffer .= substr($this->content, $this->i, $pos - $this->i); $this->i = $pos; break; } // Still collecting name $this->buffer .= $char; } $this->i++; break; case self::STATE_KEY: /** * @TODO If map keys can contain escaping characters */ if ($cCode === self::CCODE_COLON) { // Colon found - switch state to value collecting $this->state = self::STATE_VALUE; // fill token with data $this->stackPush(self::TTYPE_KEY); } else { // Fast-forwarding to " symbol if ($this->i < strlen($this->content)) { $pos = strpos($this->content, '"', $this->i); } else { $pos = false; } if ($pos !== false && $pos > $this->i) { // Before " symbol $this->buffer = substr($this->content, $this->i, $pos - $this->i); $this->i = $pos; } } $this->i++; break; case self::STATE_VALUE: if ($cCode === self::CCODE_COMMA) { // No value - switch state to comma $this->state = self::STATE_COMMA; // token type is null $this->stackPush(self::TTYPE_NULL); } elseif ($cCode === self::CCODE_DOUBLE_QUOTE) { // switch state to string collecting $this->state = self::STATE_STRING; $this->i++; } elseif ($cCode === self::CCODE_HASH) { // found hash - switch state to link $this->state = self::STATE_LINK; // add hash to value $this->buffer = $char; $this->i++; } elseif ($cCode === self::CCODE_OPEN_ARROW) { // < found, state is still value $this->state = self::STATE_VALUE; // token type is set start $this->stackPush(self::TTYPE_SET_START); // started set $isSet = true; $this->i++; } elseif ($cCode === self::CCODE_CLOSE_ARROW) { // > found, $this->state = self::STATE_COMMA; // token type is set end $this->stackPush(self::TTYPE_SET_END); // stopped set $isSet = false; $this->i++; } elseif ($cCode === self::CCODE_OPEN_SQUARE) { // [ found, state is still value $this->state = self::STATE_VALUE; // token type is list start $this->stackPush(self::TTYPE_LIST_START); // started list $isList = true; $this->i++; } elseif ($cCode === self::CCODE_CLOSE_SQUARE) { // ] found, $this->state = self::STATE_COMMA; // token type is collection end $this->stackPush(self::TTYPE_LIST_END); // stopped list $isList = false; $this->i++; } elseif ($cCode === self::CCODE_OPEN_CURLY) { // found { switch state to name $this->state = self::STATE_KEY; // token type is map start $this->stackPush(self::TTYPE_MAP_START); // started map $isMap = true; $this->i++; } elseif ($cCode === self::CCODE_CLOSE_CURLY) { // } found // check if null value in the end of the map if ($this->stackGetLastType() === self::TTYPE_KEY) { // token type is map end $this->stackPush(self::TTYPE_NULL); break; } $this->state = self::STATE_COMMA; // token type is map end $this->stackPush(self::TTYPE_MAP_END); // stopped map $isMap = false; $this->i++; } elseif ($cCode === self::CCODE_OPEN_PARENTHESES) { // ( found, state is COMMA $this->state = self::STATE_COMMA; // increment position so we can transfer clean document $this->i++; // create new parser $parser = new OrientDBRecordDecoder(substr($this->content, $this->i)); // create new embedded document and populate its values $tokenValue = new OrientDBRecord(); $tokenValue->data = $parser->data; $tokenValue->className = $parser->className; $tokenValue->setParsed(); // token type is embedded $this->stackPush(self::TTYPE_EMBEDDED, $tokenValue); // fast forward to embedded position $this->i += $parser->i; // increment counter so we can continue on clean document $this->i++; break; } elseif ($cCode === self::CCODE_CLOSE_PARENTHESES) { // end of current document reached $this->continue = false; break; } elseif ($cCode === self::CCODE_BOOL_FALSE || $cCode === self::CCODE_BOOL_TRUE) { // boolean found - switch state to boolean $this->state = self::STATE_BOOLEAN; $this->buffer = $char; $this->i++; } else { if ($cClass === self::CCLASS_NUMBER || $cCode === self::CCODE_MINUS) { // number found - switch to number collecting $this->state = self::STATE_NUMBER; $this->buffer = $char; $this->i++; } elseif ($char === false) { $this->i++; } } break; case self::STATE_COMMA: if ($cCode === self::CCODE_COMMA) { // Found a comma - switch to if ($isSet) { $this->state = self::STATE_VALUE; } elseif ($isList) { $this->state = self::STATE_VALUE; } elseif ($isMap) { $this->state = self::STATE_KEY; } else { $this->state = self::STATE_GUESS; } $this->i++; } else { $this->state = self::STATE_VALUE; } break; case self::STATE_STRING: // Check, if we can fast-forward to next " or \ symbol if ($this->i < strlen($this->content)) { // Separate search for symbols $pos_quote = strpos($this->content, '"', $this->i); $pos_escape = strpos($this->content, '\\', $this->i); // Get first position if ($pos_escape !== false) { $pos = min($pos_quote, $pos_escape); } else { $pos = $pos_quote; } } else { $pos = false; } if ($pos !== false) { // If position is found if ($pos > $this->i + 1) { // And position is before any possible escape symbol // Add to buffer $this->buffer .= substr($this->content, $this->i, $pos - $this->i - 1); // Fast-forwarding $this->i = $pos - 1; break; } } if ($cCode === self::CCODE_ESCAPE) { // escaping 1 symbol if ($escape === true) { $this->buffer .= $char; $escape = false; } else { $escape = true; } } elseif ($cCode === self::CCODE_DOUBLE_QUOTE) { if ($escape === true) { $this->buffer .= $char; $escape = false; } else { // found end of string value - switch state to comma $this->state = self::STATE_COMMA; // token type is string $this->stackPush(self::TTYPE_STRING); } } else { // found next byte in string $this->buffer .= $char; } $this->i++; break; case self::STATE_LINK: // Fast-forward $result = preg_match('/\\d+:\\d+/', $this->content, $matches, PREG_OFFSET_CAPTURE, $this->i); // And matches from current position if ($result && $matches[0][1] === $this->i) { $this->buffer = $matches[0][0]; $this->i += strlen($this->buffer); } else { // switch state to if ($cCode === self::CCODE_COMMA) { $this->state = self::STATE_COMMA; } else { $this->state = self::STATE_VALUE; } // token type is link $this->stackPush(self::TTYPE_LINK, new OrientDBTypeLink($this->buffer)); } break; case self::STATE_NUMBER: // Fast-forward $result = preg_match('/[\\d\\.e-]+/i', $this->content, $matches, PREG_OFFSET_CAPTURE, $this->i); // And matches from current position if ($result && $matches[0][1] === $this->i) { $this->buffer .= $matches[0][0]; $this->i += strlen($matches[0][0]); } else { // switch state to if ($cCode === self::CCODE_COMMA) { $this->state = self::STATE_COMMA; } elseif ($cClass === self::CCLASS_WORD) { $this->state = self::STATE_COMMA; } else { $this->state = self::STATE_VALUE; } // fill token if ($cCode === self::CCODE_NUM_BYTE || $cCode === self::CCODE_NUM_SHORT) { $tokenValue = (int) $this->buffer; $this->i++; } elseif ($cCode === self::CCODE_NUM_LONG || $cCode === self::CCODE_NUM_FLOAT || $cCode === self::CCODE_NUM_DOUBLE || $cCode === self::CCODE_NUM_DECIMAL) { $tokenValue = (double) $this->buffer; $this->i++; } elseif ($cCode === self::CCODE_DATE) { // This is datetime $tokenValue = new OrientDBTypeDate($this->buffer); $this->i++; } else { // this is int $tokenValue = (int) $this->buffer; } // token type is number $this->stackPush(self::TTYPE_NUMBER, $tokenValue); } break; case self::STATE_BOOLEAN: // Fast-forward // @TODO It's possible to gain more speed by checking not entire literal, but only first (really, second) character if (strpos($this->content, 'rue', $this->i) === $this->i) { $tokenValue = true; $this->i += 3; } elseif (strpos($this->content, 'alse', $this->i) === $this->i) { $tokenValue = false; $this->i += 4; } else { throw new OrientDBDeSerializeException('Can\'t de-serialize boolean value on key "' . $this->stackGetLastKey() . '"'); } // found end of boolean value - switch state to comma $this->state = self::STATE_COMMA; // token value is boolean $this->stackPush(self::TTYPE_BOOLEAN, $tokenValue); break; default: return; break; } switch ($this->stackGetLastType()) { case false: case self::TTYPE_NAME: case self::TTYPE_KEY: case self::TTYPE_SET_START: case self::TTYPE_LIST_START: case self::TTYPE_MAP_START: // some speed up break; case self::TTYPE_CLASS: list(, $value) = $this->stackPop(); $this->className = $value; break; case self::TTYPE_STRING: case self::TTYPE_LINK: case self::TTYPE_NUMBER: case self::TTYPE_BOOLEAN: case self::TTYPE_EMBEDDED: if (!$isSet && !$isMap && !$isList) { list(, $value) = $this->stackPop(); list(, $name) = $this->stackPop(); $this->data->{$name} = $value; } break; case self::TTYPE_NULL: if (!$isSet && !$isMap && !$isList) { $this->stackPop(); list(, $name) = $this->stackPop(); $this->data->{$name} = null; } break; case self::TTYPE_SET_END: $values = array(); do { list($searchToken, $value) = $this->stackPop(); if ($searchToken !== self::TTYPE_SET_START && $searchToken !== self::TTYPE_SET_END) { $values[] = $value; } } while ($searchToken !== self::TTYPE_SET_START); list(, $name) = $this->stackPop(); $values = array_reverse($values); $this->data->{$name} = $values; break; case self::TTYPE_LIST_END: $values = array(); do { list($searchToken, $value) = $this->stackPop(); if ($searchToken !== self::TTYPE_LIST_START && $searchToken !== self::TTYPE_LIST_END) { $values[] = $value; } } while ($searchToken !== self::TTYPE_LIST_START); list(, $name) = $this->stackPop(); $values = array_reverse($values); $this->data->{$name} = $values; break; case self::TTYPE_MAP_END: $values = array(); do { list($searchToken, $value) = $this->stackPop(); // check for null value if ($searchToken === self::TTYPE_NULL) { $value = null; } if ($searchToken !== self::TTYPE_MAP_START && $searchToken !== self::TTYPE_MAP_END) { list(, $key) = $this->stackPop(); $values[$key] = $value; } } while ($searchToken !== self::TTYPE_MAP_START); list(, $name) = $this->stackPop(); $values = array_reverse($values, true); $this->data->{$name} = $values; break; default: break; } } }