Exemple #1
0
    public function testOne()
    {
        $input = <<<'EOD'
\s \nd UNkulunkulu\nd* u\add ba\add*xwayisa ngokulunga okungokwabantu 文字ab化け
\s Ukulunga okuku\nd Kristu\nd* אבabגד kuyinzuzo אבגד ab
EOD;
        $output = Filter_Hyphenate::atTransition($this->firstset, $this->secondset, $input);
        $standard = <<<'EOD'
\s \nd UNku­lu­nku­lu\nd* u\add ba\add*­xwa­yi­sa ngo­ku­lu­nga oku­ngo­kwa­ba­ntu 文字a­b化け
\s Uku­lu­nga oku­ku\nd Kri­stu\nd* אבa­bגד ku­yi­nzu­zo אבגד ab
EOD;
        $this->assertEquals($standard, $output);
    }
Exemple #2
0
$database_logs = Database_Logs::getInstance();
$database_books = Database_Books::getInstance();
$database_config_bible = Database_Config_Bible::getInstance();
$database_bibles = Database_Bibles::getInstance();
$database_users = Database_Users::getInstance();
$inputBible = Filter_Cli::argument(@$argv, 1);
$outputBible = "{$inputBible}-hyphenated";
$user = Filter_Cli::argument(@$argv, 2);
$database_logs->log("Reading Bible {$inputBible}, adding soft hyphens, putting it into Bible {$outputBible}");
// Get the two sets of characters as arrays.
// The /u switch treats the text as UTF8 Unicode.
preg_match_all('/./u', $database_config_bible->getHyphenationFirstSet($inputBible), $firstset);
$firstset = $firstset[0];
preg_match_all('/./u', $database_config_bible->getHyphenationSecondSet($inputBible), $secondset);
$secondset = $secondset[0];
// Delete and (re)create the hyphenated Bible, and grant privileges.
$database_bibles->deleteBible($outputBible);
$database_bibles->createBible($outputBible);
$database_users->grantAccess2Bible($user, $outputBible);
// Go through the input Bible's books and chapters.
$books = $database_bibles->getBooks($inputBible);
foreach ($books as $book) {
    $database_logs->log($database_books->getEnglishFromId($book));
    $chapters = $database_bibles->getChapters($inputBible, $book);
    foreach ($chapters as $chapter) {
        $data = $database_bibles->getChapter($inputBible, $book, $chapter);
        $data = Filter_Hyphenate::atTransition($firstset, $secondset, $data);
        $database_bibles->storeChapter($outputBible, $book, $chapter, $data);
    }
}
$database_logs->log("The Bible has been hyphenated");
Exemple #3
0
 /**
  * This filter inserts soft hyphens in $text.
  * It goes through $text character by character.
  * At the transition from any character in $firstset
  * to any character in $secondset, it inserts a soft hyphen.
  * $firstset: array of characters.
  * $secondset: array of characters.
  * $text: A string of text to operate on.
  * Returns: The hyphenated $text.
  */
 public static function atTransition($firstset, $secondset, $text)
 {
     // Verify the input.
     if (!is_array($firstset)) {
         return $text;
     }
     if (count($firstset) == 0) {
         return $text;
     }
     if (!is_array($secondset)) {
         return $text;
     }
     if (count($secondset) == 0) {
         return $text;
     }
     if (!is_string($text)) {
         return "";
     }
     // Split the text up into lines and go through each one.
     $lines = explode("\n", $text);
     foreach ($lines as &$line) {
         // Split the line up into an array of UTF8 Unicode characters.
         $characters = array();
         if ($line != "") {
             preg_match_all('/./u', $line, $characters);
             $characters = $characters[0];
         }
         // Processor flags.
         $previousCharacterWasRelevant = false;
         $thisCharacterIsRelevant = false;
         $isUsfm = false;
         // Process each character.
         foreach ($characters as $key => &$character) {
             // Skip USFM marker.
             if ($character == "\\") {
                 $isUsfm = true;
             }
             if (!$isUsfm) {
                 // Check whether to insert the soft hyphen here.
                 $thisCharacterIsRelevant = in_array($character, $secondset);
                 if ($thisCharacterIsRelevant && $previousCharacterWasRelevant) {
                     if (!Filter_Hyphenate::nearWhiteSpace($characters, $key)) {
                         $character = Filter_Character::softHyphen() . $character;
                     }
                 }
                 // Flag for next iteration.
                 $previousCharacterWasRelevant = in_array($character, $firstset);
             }
             if ($isUsfm) {
                 // Look for the end of the USFM marker.
                 if ($character == " ") {
                     $isUsfm = false;
                 }
                 if ($character == "*") {
                     $isUsfm = false;
                 }
             }
         }
         // Re-assemble the line from the separate (updated) characters.
         $line = implode("", $characters);
     }
     // Assemble the hyphenated text from the separate lines.
     $text = implode("\n", $lines);
     return $text;
 }