/**
  * Format a string data.
  *
  * @param string $str A string.
  *
  * @return string
  */
 protected function formatString($str)
 {
     if (extension_loaded('mbstring')) {
         $originalStr = $str;
         $str = mb_convert_case($str, MB_CASE_TITLE, 'UTF-8');
         // Correct for MB_TITLE_CASE's insistence on uppercasing letters
         // immediately preceded by numerals, eg: 1st -> 1St
         $originalEncoding = mb_regex_encoding();
         mb_regex_encoding('UTF-8');
         // matches an upper case letter character immediately preceded by a numeral
         mb_ereg_search_init($str, '[0-9]\\p{Lu}');
         while ($match = mb_ereg_search_pos()) {
             $charPos = $match[0] + 1;
             // Only swap it back to lowercase if it was lowercase to begin with
             if (mb_ereg_match('\\p{Ll}', $originalStr[$charPos])) {
                 $str[$charPos] = mb_strtolower($str[$charPos]);
             }
         }
         mb_regex_encoding($originalEncoding);
     } else {
         $str = $this->lowerize($str);
         $str = ucwords($str);
     }
     $str = str_replace('-', '- ', $str);
     $str = str_replace('- ', '-', $str);
     return $str;
 }
Example #2
0
function test_search($test_enc, $str, $look_for, $opt, $in_enc = 'EUC-JP')
{
    mb_regex_encoding($test_enc);
    $str = mb_convert_encoding($str, $test_enc, $in_enc);
    $look_for = mb_convert_encoding($look_for, $test_enc, $in_enc);
    mb_ereg_search_init($str, $look_for, $opt);
    while (mb_ereg_search_pos()) {
        $regs = mb_ereg_search_getregs();
        array_shift($regs);
        printf("(%s) (%d) %s\n", $test_enc, mb_ereg_search_getpos(), mb_convert_encoding(is_array($regs) ? implode('-', $regs) : '', $in_enc, $test_enc));
    }
}
 /**
  * A cross between mb_split and preg_split, adding the preg_split flags
  * to mb_split.
  * @param string $pattern
  * @param string $string
  * @param int $limit
  * @param int $flags
  * @return array
  */
 private static function mbSplit($pattern, $string, $limit = -1, $flags = 0)
 {
     $strlen = strlen($string);
     // bytes!
     mb_ereg_search_init($string);
     $lengths = array();
     $position = 0;
     while (($array = mb_ereg_search_pos($pattern, '')) !== false) {
         // capture split
         $lengths[] = array($array[0] - $position, false, null);
         // move position
         $position = $array[0] + $array[1];
         // capture delimiter
         $regs = mb_ereg_search_getregs();
         $lengths[] = array($array[1], true, isset($regs[1]) && $regs[1]);
         // Continue on?
         if ($position >= $strlen) {
             break;
         }
     }
     // Add last bit, if not ending with split
     $lengths[] = array($strlen - $position, false, null);
     // Substrings
     $parts = array();
     $position = 0;
     $count = 1;
     foreach ($lengths as $length) {
         $is_delimiter = $length[1];
         $is_captured = $length[2];
         if ($limit > 0 && !$is_delimiter && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY) && ++$count > $limit) {
             if ($length[0] > 0 || ~$flags & PREG_SPLIT_NO_EMPTY) {
                 $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position), $position) : mb_strcut($string, $position);
             }
             break;
         } elseif ((!$is_delimiter || $flags & PREG_SPLIT_DELIM_CAPTURE && $is_captured) && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY)) {
             $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position, $length[0]), $position) : mb_strcut($string, $position, $length[0]);
         }
         $position += $length[0];
     }
     return $parts;
 }
Example #4
0
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
$lang = $_REQUEST["lang"];
$xml = new SimpleXMLElement(file_get_contents("php://input"));
$spell = pspell_new($lang, "", "", "utf-8", PSPELL_NORMAL);
$suggestions = array();
$offset = 0;
mb_regex_encoding("UTF-8");
foreach (mb_split("\n", $xml->text) as $line) {
    $len = mb_strlen($line, "UTF-8");
    mb_ereg_search_init($line, "\\w+");
    while (($wpos = mb_ereg_search_pos()) != FALSE) {
        $word = mb_substr($line, $wpos[0], $wpos[1]);
        if (!pspell_check($spell, $word)) {
            $woffset = mb_strlen(mb_substr($line, 0, $wpos[0]), "UTF-8");
            $wlen = mb_strlen($word, "UTF-8");
            array_push($suggestions, array($offset + $woffset, $wlen, pspell_suggest($spell, $word)));
        }
    }
    $offset += $len + 1;
}
$xml = new SimpleXMLElement("<spellresponse/>");
$xml->addAttribute("error", count($suggestions) ? "1" : "0");
foreach ($suggestions as $s) {
    $c = $xml->addChild("c", join("\t", $s[2]));
    $c->addAttribute("o", $s[0]);
    $c->addAttribute("l", $s[1]);
Example #5
0
$reg = "\\w+";
mb_regex_encoding("UTF-8");
mb_ereg_search_init($str, $reg);
$r = mb_ereg_search();
$r = mb_ereg_search_getregs();
// get first result
var_dump($r == array("PrÜÝ" . "fung"));
$str = "PrÜÝ" . "fung abc pÜ";
$reg = "\\w+";
mb_regex_encoding("UTF-8");
mb_ereg_search_init($str, $reg);
$r = mb_ereg_search();
$r = mb_ereg_search_getregs();
// get first result
var_dump($r == array("PrÜÝ" . "fung"));
var_dump(mb_ereg_search_pos());
$str = "PrÜÝ" . "fung abc pÜ";
$reg = "\\w+";
mb_regex_encoding("UTF-8");
mb_ereg_search_init($str, $reg);
$r = mb_ereg_search();
$r = mb_ereg_search_getregs();
// get first result
var_dump($r === array("PrÜÝ" . "fung"));
$r = mb_ereg_search_regs();
// get next result
var_dump($r);
$str = "PrÜÝ" . "fung abc pÜ";
$reg = "\\w+";
mb_regex_encoding("UTF-8");
mb_ereg_search_init($str, $reg);
Example #6
0
    if (isset($_POST['submit']) && isset($_POST['page']) && $_POST['page'] == $i) {
        echo "selected";
    }
    echo ">" . $names[$i] . "</option>";
}
echo "</select>\n\t\t\t<input type='submit' name='submit' value='Edit'></form>";
//if there is a GET variable make a POST variable
if (isset($_GET['page'])) {
    $_POST['page'] = $_GET['page'];
    $_POST['submit'] = "submit";
}
//when they select which page, this is the form to edit it
if (isset($_POST['submit']) && isset($_POST['page'])) {
    $page = $names[$_POST['page']];
    $menu = $menu_names[$_POST['page']];
    //read the file
    $myFile = "../pages/{$menu}/{$page}.php";
    $fh = fopen($myFile, "r");
    $theData = fread($fh, filesize($myFile));
    fclose($fh);
    //search it for the div tag
    mb_ereg_search_init($theData, "<div class='info'>");
    $arro = mb_ereg_search_pos("<div class='info'>");
    mb_ereg_search_init($theData, "</div>");
    $arroc = mb_ereg_search_pos("</div>");
    echo "<form action='change_page.php' method='POST'>\n\t\t\t\t<textarea rows='15' cols='40' name='body'>";
    //get the substring between the tags and get rid of the whitespace before and after
    echo trim(strip_tags(substr($theData, $arro[0] + $arro[1], $arroc[0] - ($arro[0] + $arro[1]))));
    echo "</textarea>\n\t\t\t\t<input type='hidden' name='x' value='{$x[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='y' value='{$y[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='z' value='{$z[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='name' value='{$page}'>\n\t\t\t\t<input type='hidden' name='html' value='n'>\n\t\t\t\t<input type='hidden' name='nav' value='n'>\n\t\t\t\t<input type='submit' name='submit' value='Save'>\n\t\t\t\t</form>";
}
include "../includes/layout/b2.php";
function wrsqz_extractTextFromMathML($formula, $encoded=true){
    //Algorythm: We scan the mathML tag by tag.
    //If a tag is one of the allowed (math, mrow) we save it at the stack
    //and continue with the next.
    //If the tag is not allowed (mfenced, mfrac,...) we skip all mathML until its
    //closure (</mfenced>, </mfrac>)
    //If the tag is <mtext> we rearange the formula
    //If a tag is a closure of allowed tag, we pop it from the stack.

    //rapid return if nothing to do.
    if(strpos($formula,'mtext')===false) return $formula;
    //initializations
    $opentag = $encoded ? '«' : '<';
    $closetag = $encoded ? '»' : '>';
    //tags where an <mtext> can live inside.
    $allowedtags = array('math', 'mrow');

    $pattern = $opentag.'([^'.$opentag.$closetag.']*)'.$closetag; //regexp that matches a single tag label
    mb_ereg_search_init($formula, $pattern);
    $stack = array();       //stack of opened tags
    $omittedcontent=false;  //there is math content before the current point?
    $lasttag=null;          //last tag of the stack
    $length = strlen($formula);
    $beginformula = strpos($formula, $opentag);   //position of the first character of the last formula (in bytes).
    $pos=array(0,0);
    //CAUTION: If you change this function, be very carefull with multibyte
    //         and non-multibyte functions.
    while(($pos[0]+$pos[1])<$length){
        $pos = mb_ereg_search_pos($pattern);

        if($pos[0]+$pos[1] < $length){
            //this will be always true but the last iteration
            mb_ereg_search_setpos($pos[0]+$pos[1]);
        }
        $tag = substr($formula, $pos[0],$pos[1]);
        $trimmedTag = mb_substr($tag,1,-1);
        //skip autoclosed tags
        if(mb_substr($trimmedTag,-1) == '/'){
            continue;
        }
        //discard attributes
        if(($spacepos = mb_strpos($trimmedTag,' '))!==false){
            $trimmedTag=mb_substr($trimmedTag,0,$spacepos);
        }      
        if(in_array($trimmedTag,$allowedtags)){
        //allowed tag
            $stack[]=array($trimmedTag,$tag);
            $lasttag = $trimmedTag;
        }else if($trimmedTag == '/'.$lasttag){
        //close allowed tag
            array_pop($stack);
            $lasttag = end($stack);
            $lasttag = $lasttag[0];
            //discard empty formulas
            if(empty($stack) && !$omittedcontent){
                $formula1 = substr($formula, 0, $beginformula);
                if($pos[0]+$pos[1]<$length){
                    //this isn't the end.
                    $formula2 = substr($formula, $pos[0]+$pos[1]);
                    $formula = $formula1 . $formula2;
                    $length = strlen($formula);
                    mb_ereg_search_init($formula, $pattern);
                    mb_ereg_search_setpos($beginformula);
                }else{
                    //this is the last iteration. $length and mb_ereg_search
                    //string and position will be wrong, but it doesn't matter.
                    $formula = $formula1;
                }
                
            }
        }else if($trimmedTag == 'mtext'){
            $pos2 = mb_ereg_search_pos($opentag.'/mtext'.$closetag);
            $text = substr($formula, $pos[0]+$pos[1], $pos2[0]-($pos[0]+$pos[1]));
            //Decode some chars in text
            if($encoded) $text=wrsqz_mathmlDecode($text);
            $text = str_replace('&centerdot;','&middot;',$text);
            $text = str_replace('&apos;','&#39;',$text);
            $formula1 = substr($formula, 0, $pos[0]);  //until <mtext>
            $formula2 = substr($formula, $pos2[0]+$pos2[1]); //from </mtext>
            if($omittedcontent){ 
                //we have a non-empty formula before the text so we must close it
                //compute the tail (close tags) of the formula before the text
                //and the head (open tags) of the formula after the text.
                $copystack = $stack; //copy stack
                $tail1 = '';
                $head2 = '';
                while($stacktag = array_pop($copystack)){
                    $tail1.= $opentag.'/'.$stacktag[0].$closetag;
                    $head2 = $stacktag[1] . $head2;
                }
                $formula1 = $formula1 . $tail1;
                $formula2 = $head2 . $formula2;
                //update $formula
                $formula = $formula1 . $text . $formula2;
                $beginformula = $pos[0]+strlen($tail1)+strlen($text);
                $position = $beginformula+strlen($head2);
            }else{
            //we have an empty formula before the text so we must skip it.
                $head = substr($formula1, 0, $beginformula); //all before the empty formula
                $formula1 = substr($formula1, $beginformula);

                $formula = $head . $text . $formula1 . $formula2;
                $beginformula += strlen($text);
                $position = $beginformula +strlen($formula1);
            }
            //update parameters with the new formula.
            $length = strlen($formula);
            $omittedcontent = false;
            mb_ereg_search_init($formula, $pattern);
            mb_ereg_search_setpos($position);
            
        }else{
        //not allowed tag: go to its closure and remember that we omitted content
            $pos = mb_ereg_search_pos($opentag.'/'.$trimmedTag.$closetag);
            if($pos === false){
                return $formula; //this is an error in XML (unclosed tag);
            }
            $omittedcontent=true;
            mb_ereg_search_setpos($pos[0]+$pos[1]);
        }
    }

    return $formula;

}
Example #8
0
 /**
  * Finds the substring of the $string that has at most $length letters with the $suffix appended (if needed)
  * Allows for custom $delimiter and $suffix.
  * Uses mb_ereg_* functions that have less complexity than the explode and foreach functions used in other subString function.
  * @param string $string
  * @param int $length
  * @param string $delimiter
  * @param string $suffix
  * @return string
  */
 public static function subStringCustom($string, $length, $delimiter = ' ', $suffix = '...')
 {
     mb_internal_encoding(mb_detect_encoding($string));
     //Detect current string encoding
     if (mb_strlen($string) <= $length) {
         //String is already shorter than the length, return the string
         return $string;
     }
     $length -= mb_strlen($suffix);
     //We subtract the suffix's length from our goal
     //We find matches of $delimiter that have at most the $length characters before it
     $pattern = '.{0,' . $length . '}' . $delimiter;
     mb_ereg_search_init($string, $pattern);
     $latestMatchPosition = mb_ereg_search_pos();
     return mb_substr($string, 0, end($latestMatchPosition) - 1) . $suffix;
     //Append suffix and return result
 }
Example #9
0
<?php

mb_regex_encoding('iso-8859-1');
$test_str = 'I�t�rn�ti�n�liz�ti�n';
if (mb_ereg_search_init($test_str)) {
    $val = mb_ereg_search_pos("n�ti�n");
    var_dump($val);
} else {
    var_dump("false");
}