/** * Format a string data. * * @param string $str A string. * * @return string */ protected function formatString($str) { if (extension_loaded('mbstring')) { $originalStr = $str; $str = mb_convert_case($str, MB_CASE_TITLE, 'UTF-8'); // Correct for MB_TITLE_CASE's insistence on uppercasing letters // immediately preceded by numerals, eg: 1st -> 1St $originalEncoding = mb_regex_encoding(); mb_regex_encoding('UTF-8'); // matches an upper case letter character immediately preceded by a numeral mb_ereg_search_init($str, '[0-9]\\p{Lu}'); while ($match = mb_ereg_search_pos()) { $charPos = $match[0] + 1; // Only swap it back to lowercase if it was lowercase to begin with if (mb_ereg_match('\\p{Ll}', $originalStr[$charPos])) { $str[$charPos] = mb_strtolower($str[$charPos]); } } mb_regex_encoding($originalEncoding); } else { $str = $this->lowerize($str); $str = ucwords($str); } $str = str_replace('-', '- ', $str); $str = str_replace('- ', '-', $str); return $str; }
function test_search($test_enc, $str, $look_for, $opt, $in_enc = 'EUC-JP') { mb_regex_encoding($test_enc); $str = mb_convert_encoding($str, $test_enc, $in_enc); $look_for = mb_convert_encoding($look_for, $test_enc, $in_enc); mb_ereg_search_init($str, $look_for, $opt); while (mb_ereg_search_pos()) { $regs = mb_ereg_search_getregs(); array_shift($regs); printf("(%s) (%d) %s\n", $test_enc, mb_ereg_search_getpos(), mb_convert_encoding(is_array($regs) ? implode('-', $regs) : '', $in_enc, $test_enc)); } }
/** * A cross between mb_split and preg_split, adding the preg_split flags * to mb_split. * @param string $pattern * @param string $string * @param int $limit * @param int $flags * @return array */ private static function mbSplit($pattern, $string, $limit = -1, $flags = 0) { $strlen = strlen($string); // bytes! mb_ereg_search_init($string); $lengths = array(); $position = 0; while (($array = mb_ereg_search_pos($pattern, '')) !== false) { // capture split $lengths[] = array($array[0] - $position, false, null); // move position $position = $array[0] + $array[1]; // capture delimiter $regs = mb_ereg_search_getregs(); $lengths[] = array($array[1], true, isset($regs[1]) && $regs[1]); // Continue on? if ($position >= $strlen) { break; } } // Add last bit, if not ending with split $lengths[] = array($strlen - $position, false, null); // Substrings $parts = array(); $position = 0; $count = 1; foreach ($lengths as $length) { $is_delimiter = $length[1]; $is_captured = $length[2]; if ($limit > 0 && !$is_delimiter && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY) && ++$count > $limit) { if ($length[0] > 0 || ~$flags & PREG_SPLIT_NO_EMPTY) { $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position), $position) : mb_strcut($string, $position); } break; } elseif ((!$is_delimiter || $flags & PREG_SPLIT_DELIM_CAPTURE && $is_captured) && ($length[0] || ~$flags & PREG_SPLIT_NO_EMPTY)) { $parts[] = $flags & PREG_SPLIT_OFFSET_CAPTURE ? array(mb_strcut($string, $position, $length[0]), $position) : mb_strcut($string, $position, $length[0]); } $position += $length[0]; } return $parts; }
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ $lang = $_REQUEST["lang"]; $xml = new SimpleXMLElement(file_get_contents("php://input")); $spell = pspell_new($lang, "", "", "utf-8", PSPELL_NORMAL); $suggestions = array(); $offset = 0; mb_regex_encoding("UTF-8"); foreach (mb_split("\n", $xml->text) as $line) { $len = mb_strlen($line, "UTF-8"); mb_ereg_search_init($line, "\\w+"); while (($wpos = mb_ereg_search_pos()) != FALSE) { $word = mb_substr($line, $wpos[0], $wpos[1]); if (!pspell_check($spell, $word)) { $woffset = mb_strlen(mb_substr($line, 0, $wpos[0]), "UTF-8"); $wlen = mb_strlen($word, "UTF-8"); array_push($suggestions, array($offset + $woffset, $wlen, pspell_suggest($spell, $word))); } } $offset += $len + 1; } $xml = new SimpleXMLElement("<spellresponse/>"); $xml->addAttribute("error", count($suggestions) ? "1" : "0"); foreach ($suggestions as $s) { $c = $xml->addChild("c", join("\t", $s[2])); $c->addAttribute("o", $s[0]); $c->addAttribute("l", $s[1]);
$reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg); $r = mb_ereg_search(); $r = mb_ereg_search_getregs(); // get first result var_dump($r == array("PrÜÝ" . "fung")); $str = "PrÜÝ" . "fung abc pÜ"; $reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg); $r = mb_ereg_search(); $r = mb_ereg_search_getregs(); // get first result var_dump($r == array("PrÜÝ" . "fung")); var_dump(mb_ereg_search_pos()); $str = "PrÜÝ" . "fung abc pÜ"; $reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg); $r = mb_ereg_search(); $r = mb_ereg_search_getregs(); // get first result var_dump($r === array("PrÜÝ" . "fung")); $r = mb_ereg_search_regs(); // get next result var_dump($r); $str = "PrÜÝ" . "fung abc pÜ"; $reg = "\\w+"; mb_regex_encoding("UTF-8"); mb_ereg_search_init($str, $reg);
if (isset($_POST['submit']) && isset($_POST['page']) && $_POST['page'] == $i) { echo "selected"; } echo ">" . $names[$i] . "</option>"; } echo "</select>\n\t\t\t<input type='submit' name='submit' value='Edit'></form>"; //if there is a GET variable make a POST variable if (isset($_GET['page'])) { $_POST['page'] = $_GET['page']; $_POST['submit'] = "submit"; } //when they select which page, this is the form to edit it if (isset($_POST['submit']) && isset($_POST['page'])) { $page = $names[$_POST['page']]; $menu = $menu_names[$_POST['page']]; //read the file $myFile = "../pages/{$menu}/{$page}.php"; $fh = fopen($myFile, "r"); $theData = fread($fh, filesize($myFile)); fclose($fh); //search it for the div tag mb_ereg_search_init($theData, "<div class='info'>"); $arro = mb_ereg_search_pos("<div class='info'>"); mb_ereg_search_init($theData, "</div>"); $arroc = mb_ereg_search_pos("</div>"); echo "<form action='change_page.php' method='POST'>\n\t\t\t\t<textarea rows='15' cols='40' name='body'>"; //get the substring between the tags and get rid of the whitespace before and after echo trim(strip_tags(substr($theData, $arro[0] + $arro[1], $arroc[0] - ($arro[0] + $arro[1])))); echo "</textarea>\n\t\t\t\t<input type='hidden' name='x' value='{$x[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='y' value='{$y[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='z' value='{$z[$_POST['page']]}'>\n\t\t\t\t<input type='hidden' name='name' value='{$page}'>\n\t\t\t\t<input type='hidden' name='html' value='n'>\n\t\t\t\t<input type='hidden' name='nav' value='n'>\n\t\t\t\t<input type='submit' name='submit' value='Save'>\n\t\t\t\t</form>"; } include "../includes/layout/b2.php";
function wrsqz_extractTextFromMathML($formula, $encoded=true){ //Algorythm: We scan the mathML tag by tag. //If a tag is one of the allowed (math, mrow) we save it at the stack //and continue with the next. //If the tag is not allowed (mfenced, mfrac,...) we skip all mathML until its //closure (</mfenced>, </mfrac>) //If the tag is <mtext> we rearange the formula //If a tag is a closure of allowed tag, we pop it from the stack. //rapid return if nothing to do. if(strpos($formula,'mtext')===false) return $formula; //initializations $opentag = $encoded ? '«' : '<'; $closetag = $encoded ? '»' : '>'; //tags where an <mtext> can live inside. $allowedtags = array('math', 'mrow'); $pattern = $opentag.'([^'.$opentag.$closetag.']*)'.$closetag; //regexp that matches a single tag label mb_ereg_search_init($formula, $pattern); $stack = array(); //stack of opened tags $omittedcontent=false; //there is math content before the current point? $lasttag=null; //last tag of the stack $length = strlen($formula); $beginformula = strpos($formula, $opentag); //position of the first character of the last formula (in bytes). $pos=array(0,0); //CAUTION: If you change this function, be very carefull with multibyte // and non-multibyte functions. while(($pos[0]+$pos[1])<$length){ $pos = mb_ereg_search_pos($pattern); if($pos[0]+$pos[1] < $length){ //this will be always true but the last iteration mb_ereg_search_setpos($pos[0]+$pos[1]); } $tag = substr($formula, $pos[0],$pos[1]); $trimmedTag = mb_substr($tag,1,-1); //skip autoclosed tags if(mb_substr($trimmedTag,-1) == '/'){ continue; } //discard attributes if(($spacepos = mb_strpos($trimmedTag,' '))!==false){ $trimmedTag=mb_substr($trimmedTag,0,$spacepos); } if(in_array($trimmedTag,$allowedtags)){ //allowed tag $stack[]=array($trimmedTag,$tag); $lasttag = $trimmedTag; }else if($trimmedTag == '/'.$lasttag){ //close allowed tag array_pop($stack); $lasttag = end($stack); $lasttag = $lasttag[0]; //discard empty formulas if(empty($stack) && !$omittedcontent){ $formula1 = substr($formula, 0, $beginformula); if($pos[0]+$pos[1]<$length){ //this isn't the end. $formula2 = substr($formula, $pos[0]+$pos[1]); $formula = $formula1 . $formula2; $length = strlen($formula); mb_ereg_search_init($formula, $pattern); mb_ereg_search_setpos($beginformula); }else{ //this is the last iteration. $length and mb_ereg_search //string and position will be wrong, but it doesn't matter. $formula = $formula1; } } }else if($trimmedTag == 'mtext'){ $pos2 = mb_ereg_search_pos($opentag.'/mtext'.$closetag); $text = substr($formula, $pos[0]+$pos[1], $pos2[0]-($pos[0]+$pos[1])); //Decode some chars in text if($encoded) $text=wrsqz_mathmlDecode($text); $text = str_replace('·','·',$text); $text = str_replace(''',''',$text); $formula1 = substr($formula, 0, $pos[0]); //until <mtext> $formula2 = substr($formula, $pos2[0]+$pos2[1]); //from </mtext> if($omittedcontent){ //we have a non-empty formula before the text so we must close it //compute the tail (close tags) of the formula before the text //and the head (open tags) of the formula after the text. $copystack = $stack; //copy stack $tail1 = ''; $head2 = ''; while($stacktag = array_pop($copystack)){ $tail1.= $opentag.'/'.$stacktag[0].$closetag; $head2 = $stacktag[1] . $head2; } $formula1 = $formula1 . $tail1; $formula2 = $head2 . $formula2; //update $formula $formula = $formula1 . $text . $formula2; $beginformula = $pos[0]+strlen($tail1)+strlen($text); $position = $beginformula+strlen($head2); }else{ //we have an empty formula before the text so we must skip it. $head = substr($formula1, 0, $beginformula); //all before the empty formula $formula1 = substr($formula1, $beginformula); $formula = $head . $text . $formula1 . $formula2; $beginformula += strlen($text); $position = $beginformula +strlen($formula1); } //update parameters with the new formula. $length = strlen($formula); $omittedcontent = false; mb_ereg_search_init($formula, $pattern); mb_ereg_search_setpos($position); }else{ //not allowed tag: go to its closure and remember that we omitted content $pos = mb_ereg_search_pos($opentag.'/'.$trimmedTag.$closetag); if($pos === false){ return $formula; //this is an error in XML (unclosed tag); } $omittedcontent=true; mb_ereg_search_setpos($pos[0]+$pos[1]); } } return $formula; }
/** * Finds the substring of the $string that has at most $length letters with the $suffix appended (if needed) * Allows for custom $delimiter and $suffix. * Uses mb_ereg_* functions that have less complexity than the explode and foreach functions used in other subString function. * @param string $string * @param int $length * @param string $delimiter * @param string $suffix * @return string */ public static function subStringCustom($string, $length, $delimiter = ' ', $suffix = '...') { mb_internal_encoding(mb_detect_encoding($string)); //Detect current string encoding if (mb_strlen($string) <= $length) { //String is already shorter than the length, return the string return $string; } $length -= mb_strlen($suffix); //We subtract the suffix's length from our goal //We find matches of $delimiter that have at most the $length characters before it $pattern = '.{0,' . $length . '}' . $delimiter; mb_ereg_search_init($string, $pattern); $latestMatchPosition = mb_ereg_search_pos(); return mb_substr($string, 0, end($latestMatchPosition) - 1) . $suffix; //Append suffix and return result }
<?php mb_regex_encoding('iso-8859-1'); $test_str = 'I�t�rn�ti�n�liz�ti�n'; if (mb_ereg_search_init($test_str)) { $val = mb_ereg_search_pos("n�ti�n"); var_dump($val); } else { var_dump("false"); }