/** * Build regular expression * * Builds the regilar expression from the provided automaton, where each * node associates with one of the equivalency classes, and a topologically * sorted list of these nodes. * * The provided automaton must be an instance of the * slCountingSingleOccurenceAutomaton, so t at it provides information on * how often the token occur in the learned strings. * * @param slCountingSingleOccurenceAutomaton $automaton * @param array $classes * @return slRegularExpression */ protected function buildRegularExpression(slCountingSingleOccurenceAutomaton $automaton, array $classes) { // xsd:all may only occur outmost and makes only sense for equivalence // classes with more then one elment. if (count($classes) === 1) { $class = reset($classes); $term = $eClasses = $this->equivalenceClasses[$class]; $count = $automaton->getOccurenceSum($eClasses); $nodes = $automaton->getNodes(); $generalCount = $automaton->getGeneralOccurences($eClasses); if (count($term) > 1 && $generalCount['max'] === 1 && $count['max'] > 1) { // Inference all $term = new slRegularExpressionAll(array_map(function ($term) use($nodes) { return new slRegularExpressionElement($nodes[$term]); }, $term)); $term->minOccurences = $generalCount['min']; return $term; } } return parent::buildRegularExpression($automaton, $classes); }
public function testConvertPaperExample() { // Example 2. Let W = {abccde, cccad, bfegg, bfehi}. $automaton = new slCountingSingleOccurenceAutomaton(); $automaton->learn(array('a', 'b', 'c', 'c', 'd', 'e')); $automaton->learn(array('c', 'c', 'c', 'a', 'd')); $automaton->learn(array('b', 'f', 'e', 'g', 'g')); $automaton->learn(array('b', 'f', 'e', 'h', 'i')); $converter = new slChareConverter(); $regexp = $converter->convertAutomaton($automaton); $this->assertEquals(new slRegularExpressionSequence(new slRegularExpressionRepeatedAtLeastOnce(new slRegularExpressionChoice(new slRegularExpressionElement('a'), new slRegularExpressionElement('b'), new slRegularExpressionElement('c'))), new slRegularExpressionChoice(new slRegularExpressionElement('d'), new slRegularExpressionElement('f')), new slRegularExpressionOptional(new slRegularExpressionElement('e')), new slRegularExpressionRepeated(new slRegularExpressionElement('g')), new slRegularExpressionOptional(new slRegularExpressionElement('h')), new slRegularExpressionOptional(new slRegularExpressionElement('i'))), $regexp); }
/** * Merge type with another type * * @todo: Ignore simple types, for now, and especially does not merge * attribute types. Only builds a list with all attributes from all merged * types. * * @param slSchemaType $type * @return void */ public function merge(slSchemaType $type) { // Merge simple type $this->empty = $this->empty & $type->empty; // Merge attributes foreach ($type->attributes as $name => $attribute) { $optional = !isset($this->attributes[$name]) || $this->attributes[$name]->optional || $type->attributes[$name]->optional; if (!isset($this->attributes[$name])) { $this->attributes[$name] = $attribute; } $this->attributes[$name]->optional = $optional; } // Make attributes optional, which do not not occur in the merged type foreach ($this->attributes as $name => $attribute) { if (!isset($type->attributes[$name])) { $this->attributes[$name]->optional = true; } } // Merge type automatons $this->regularExpression = null; $this->automaton->merge($type->automaton); }
/** * Build regular expression * * Builds the regilar expression from the provided automaton, where each * node associates with one of the equivalency classes, and a topologically * sorted list of these nodes. * * The provided automaton must be an instance of the * slCountingSingleOccurenceAutomaton, so t at it provides information on * how often the token occur in the learned strings. * * @param slCountingSingleOccurenceAutomaton $automaton * @param array $classes * @return slRegularExpression */ protected function buildRegularExpression(slCountingSingleOccurenceAutomaton $automaton, array $classes) { $terms = array(); $nodes = $automaton->getNodes(); foreach ($classes as $class) { $term = $classes = $this->equivalenceClasses[$class]; if (count($term) > 1) { $term = new slRegularExpressionChoice(array_map(function ($term) use($nodes) { return new slRegularExpressionElement($nodes[$term]); }, $term)); } else { $term = new slRegularExpressionElement($nodes[reset($term)]); } $terms[] = $this->wrapCountingPattern($automaton->getOccurenceSum($classes), $term); } return new slRegularExpressionSequence($terms); }
public function testLearnAll() { $automaton = new slCountingSingleOccurenceAutomaton(); $automaton->learn(array('a', 'b', 'c')); $automaton->learn(array('a', 'c', 'b')); $automaton->learn(array('b', 'a', 'c')); $automaton->learn(array('b', 'c', 'a')); $automaton->learn(array('c', 'a', 'b')); $automaton->learn(array('c', 'b', 'a')); $converter = new slEChareConverter(); $regexp = $converter->convertAutomaton($automaton); $this->assertEquals(new slRegularExpressionAll(new slRegularExpressionElement('a'), new slRegularExpressionElement('b'), new slRegularExpressionElement('c')), $regexp); }