multibyte.php

Go to the documentation of this file.
00001 <?php
00002 /* SVN FILE: $Id: multibyte.php 7992 2009-01-14 23:05:51Z TommyO $ */
00003 /**
00004  * Multibyte handling methods.
00005  *
00006  *
00007  * PHP versions 4 and 5
00008  *
00009  * CakePHP(tm) :  Rapid Development Framework (http://www.cakephp.org)
00010  * Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
00011  *
00012  * Licensed under The MIT License
00013  * Redistributions of files must retain the above copyright notice.
00014  *
00015  * @filesource
00016  * @copyright     Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
00017  * @link          http://www.cakefoundation.org/projects/info/cakephp CakePHP(tm) Project
00018  * @package       cake
00019  * @subpackage    cake.cake.libs
00020  * @since         CakePHP(tm) v 1.2.0.6833
00021  * @version       $Revision: 7992 $
00022  * @modifiedby    $LastChangedBy: TommyO $
00023  * @lastmodified  $Date: 2009-01-14 18:05:51 -0500 (Wed, 14 Jan 2009) $
00024  * @license       http://www.opensource.org/licenses/mit-license.php The MIT License
00025  */
00026 if (function_exists('mb_internal_encoding')) {
00027     $encoding = Configure::read('App.encoding');
00028     if (!empty($encoding)) {
00029         mb_internal_encoding($encoding);
00030     }
00031 }
00032 /**
00033  * Find position of first occurrence of a case-insensitive string.
00034  *
00035  * @param string $haystack The string from which to get the position of the first occurrence of $needle.
00036  * @param string $needle The string to find in $haystack.
00037  * @param integer $offset The position in $haystack to start searching.
00038  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00039  * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false if $needle is not found.
00040  */
00041 if (!function_exists('mb_stripos')) {
00042     function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) {
00043         return Multibyte::stripos($haystack, $needle, $offset);
00044     }
00045 }
00046 /**
00047  * Finds first occurrence of a string within another, case insensitive.
00048  *
00049  * @param string $haystack The string from which to get the first occurrence of $needle.
00050  * @param string $needle The string to find in $haystack.
00051  * @param boolean $part Determines which portion of $haystack this function returns.
00052  *                If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
00053  *                If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is false.
00054  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00055  * @return string|boolean The portion of $haystack, or false if $needle is not found.
00056  */
00057 if (!function_exists('mb_stristr')) {
00058     function mb_stristr($haystack, $needle, $part = false, $encoding = null) {
00059         return Multibyte::stristr($haystack, $needle, $part);
00060     }
00061 }
00062 /**
00063  * Get string length.
00064  *
00065  * @param string $string The string being checked for length.
00066  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00067  * @return integer The number of characters in string $string having character encoding encoding.
00068  *                 A multi-byte character is counted as 1.
00069  */
00070 if (!function_exists('mb_strlen')) {
00071     function mb_strlen($string, $encoding = null) {
00072         return Multibyte::strlen($string);
00073     }
00074 }
00075 /**
00076  * Find position of first occurrence of a string.
00077  *
00078  * @param string $haystack The string being checked.
00079  * @param string $needle The position counted from the beginning of haystack.
00080  * @param integer $offset The search offset. If it is not specified, 0 is used.
00081  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00082  * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
00083  *                         If $needle is not found, it returns false.
00084  */
00085 if (!function_exists('mb_strpos')) {
00086     function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) {
00087         return Multibyte::strpos($haystack, $needle, $offset);
00088     }
00089 }
00090 /**
00091  * Finds the last occurrence of a character in a string within another.
00092  *
00093  * @param string $haystack The string from which to get the last occurrence of $needle.
00094  * @param string $needle The string to find in $haystack.
00095  * @param boolean $part Determines which portion of $haystack this function returns.
00096  *                      If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
00097  *                      If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
00098  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00099  * @return string|boolean The portion of $haystack. or false if $needle is not found.
00100  */
00101 if (!function_exists('mb_strrchr')) {
00102     function mb_strrchr($haystack, $needle, $part = false, $encoding = null) {
00103         return Multibyte::strrchr($haystack, $needle, $part);
00104     }
00105 }
00106 /**
00107  * Finds the last occurrence of a character in a string within another, case insensitive.
00108  *
00109  * @param string $haystack The string from which to get the last occurrence of $needle.
00110  * @param string $needle The string to find in $haystack.
00111  * @param boolean $part Determines which portion of $haystack this function returns.
00112  *                      If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
00113  *                      If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
00114  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00115  * @return string|boolean The portion of $haystack. or false if $needle is not found.
00116  */
00117 if (!function_exists('mb_strrichr')) {
00118     function mb_strrichr($haystack, $needle, $part = false, $encoding = null) {
00119         return Multibyte::strrichr($haystack, $needle, $part);
00120     }
00121 }
00122 /**
00123  * Finds position of last occurrence of a string within another, case insensitive
00124  *
00125  * @param string $haystack The string from which to get the position of the last occurrence of $needle.
00126  * @param string $needle The string to find in $haystack.
00127  * @param integer $offset The position in $haystack to start searching.
00128  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00129  * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string, or false if $needle is not found.
00130  */
00131 if (!function_exists('mb_strripos')) {
00132     function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) {
00133         return Multibyte::strripos($haystack, $needle, $offset);
00134     }
00135 }
00136 /**
00137  * Find position of last occurrence of a string in a string.
00138  *
00139  * @param string $haystack The string being checked, for the last occurrence of $needle.
00140  * @param string $needle The string to find in $haystack.
00141  * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
00142  *                        Negative values will stop searching at an arbitrary point prior to the end of the string.
00143  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00144  * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string. If $needle is not found, it returns false.
00145  */
00146 if (!function_exists('mb_strrpos')) {
00147     function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) {
00148         return Multibyte::strrpos($haystack, $needle, $offset);
00149     }
00150 }
00151 /**
00152  * Finds first occurrence of a string within another
00153  *
00154  * @param string $haystack The string from which to get the first occurrence of $needle.
00155  * @param string $needle The string to find in $haystack
00156  * @param boolean $part Determines which portion of $haystack this function returns.
00157  *                      If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
00158  *                      If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is FALSE.
00159  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00160  * @return string|boolean The portion of $haystack, or true if $needle is not found.
00161  */
00162 if (!function_exists('mb_strstr')) {
00163     function mb_strstr($haystack, $needle, $part = false, $encoding = null) {
00164         return Multibyte::strstr($haystack, $needle, $part);
00165     }
00166 }
00167 /**
00168  * Make a string lowercase
00169  *
00170  * @param string $string The string being lowercased.
00171  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00172  * @return string with all alphabetic characters converted to lowercase.
00173  */
00174 if (!function_exists('mb_strtolower')) {
00175     function mb_strtolower($string, $encoding = null) {
00176         return Multibyte::strtolower($string);
00177     }
00178 }
00179 /**
00180  * Make a string uppercase
00181  *
00182  * @param string $string The string being uppercased.
00183  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00184  * @return string with all alphabetic characters converted to uppercase.
00185  */
00186 if (!function_exists('mb_strtoupper')) {
00187     function mb_strtoupper($string, $encoding = null) {
00188         return Multibyte::strtoupper($string);
00189     }
00190 }
00191 /**
00192  * Count the number of substring occurrences
00193  *
00194  * @param string $haystack The string being checked.
00195  * @param string $needle The string being found.
00196  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00197  * @return integer The number of times the $needle substring occurs in the $haystack string.
00198  */
00199 if (!function_exists('mb_substr_count')) {
00200     function mb_substr_count($haystack, $needle, $encoding = null) {
00201         return Multibyte::substrCount($haystack, $needle);
00202     }
00203 }
00204 /**
00205  * Get part of string
00206  *
00207  * @param string $string The string being checked.
00208  * @param integer $start The first position used in $string.
00209  * @param integer $length The maximum length of the returned string.
00210  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00211  * @return string The portion of $string specified by the $string and $length parameters.
00212  */
00213 if (!function_exists('mb_substr')) {
00214     function mb_substr($string, $start, $length = null, $encoding = null) {
00215         return Multibyte::substr($string, $start, $length);
00216     }
00217 }
00218 /**
00219  * Encode string for MIME header
00220  *
00221  * @param string $str The string being encoded
00222  * @param string $charset specifies the name of the character set in which str is represented in.
00223  *                      The default value is determined by the current NLS setting (mbstring.language).
00224  * @param string $transfer_encoding specifies the scheme of MIME encoding. It should be either "B" (Base64) or "Q" (Quoted-Printable).
00225  *                      Falls back to "B" if not given.
00226  * @param string $linefeed specifies the EOL (end-of-line) marker with which mb_encode_mimeheader() performs line-folding
00227  *                      (a ยป RFC term, the act of breaking a line longer than a certain length into multiple lines.
00228  *                      The length is currently hard-coded to 74 characters). Falls back to "\r\n" (CRLF) if not given.
00229  * @param integer $indent [definition unknown and appears to have no affect]
00230  * @return string A converted version of the string represented in ASCII.
00231  */
00232 if (!function_exists('mb_encode_mimeheader')) {
00233     function mb_encode_mimeheader($str, $charset = 'UTF-8', $transfer_encoding = 'B', $linefeed = "\r\n", $indent = 1) {
00234         return Multibyte::mimeEncode($str, $charset, $linefeed);
00235     }
00236 }
00237 /**
00238  * Multibyte handling methods.
00239  *
00240  *
00241  * @package       cake
00242  * @subpackage    cake.cake.libs
00243  */
00244 class Multibyte extends Object {
00245 /**
00246  *  Holds the case folding values
00247  *
00248  * @var array
00249  * @access private
00250  */
00251     var $__caseFold = array();
00252 /**
00253  * Holds an array of Unicode code point ranges
00254  *
00255  * @var array
00256  * @access private
00257  */
00258     var $__codeRange = array();
00259 /**
00260  * Holds the current code point range
00261  *
00262  * @var string
00263  * @access private
00264  */
00265     var $__table = null;
00266 /**
00267  * Gets a reference to the Multibyte object instance
00268  *
00269  * @return object Multibyte instance
00270  * @access public
00271  * @static
00272  */
00273     function &getInstance() {
00274         static $instance = array();
00275 
00276         if (!$instance) {
00277             $instance[0] =& new Multibyte();
00278         }
00279         return $instance[0];
00280     }
00281 /**
00282  * Converts a multibyte character string
00283  * to the decimal value of the character
00284  *
00285  * @param multibyte string $string
00286  * @return array
00287  * @access public
00288  * @static
00289  */
00290     function utf8($string) {
00291         $map = array();
00292 
00293         $values = array();
00294         $find = 1;
00295         $length = strlen($string);
00296 
00297         for ($i = 0; $i < $length; $i++) {
00298             $value = ord($string[$i]);
00299 
00300             if ($value < 128) {
00301                 $map[] = $value;
00302             } else {
00303                 if (count($values) == 0) {
00304                     $find = ($value < 224) ? 2 : 3;
00305                 }
00306                 $values[] = $value;
00307 
00308                 if (count($values) === $find) {
00309                     if ($find == 3) {
00310                         $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
00311                     } else {
00312                         $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
00313                     }
00314                     $values = array();
00315                     $find = 1;
00316                 }
00317             }
00318         }
00319         return $map;
00320     }
00321 /**
00322  * Converts the decimal value of a multibyte character string
00323  * to a string
00324  *
00325  * @param array $array
00326  * @return string
00327  * @access public
00328  * @static
00329  */
00330     function ascii($array) {
00331         $ascii = '';
00332 
00333         foreach ($array as $utf8) {
00334             if ($utf8 < 128) {
00335                 $ascii .= chr($utf8);
00336             } elseif ($utf8 < 2048) {
00337                 $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
00338                 $ascii .= chr(128 + ($utf8 % 64));
00339             } else {
00340                 $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
00341                 $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
00342                 $ascii .= chr(128 + ($utf8 % 64));
00343             }
00344         }
00345         return $ascii;
00346     }
00347 /**
00348  * Find position of first occurrence of a case-insensitive string.
00349  *
00350  * @param multi-byte string $haystack The string from which to get the position of the first occurrence of $needle.
00351  * @param multi-byte string $needle The string to find in $haystack.
00352  * @param integer $offset The position in $haystack to start searching.
00353  * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string, or false if $needle is not found.
00354  * @access public
00355  * @static
00356  */
00357     function stripos($haystack, $needle, $offset = 0) {
00358         if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
00359             $haystack = Multibyte::strtoupper($haystack);
00360             $needle = Multibyte::strtoupper($needle);
00361             return Multibyte::strpos($haystack, $needle, $offset);
00362         }
00363         return stripos($haystack, $needle, $offset);
00364     }
00365 /**
00366  * Finds first occurrence of a string within another, case insensitive.
00367  *
00368  * @param string $haystack The string from which to get the first occurrence of $needle.
00369  * @param string $needle The string to find in $haystack.
00370  * @param boolean $part Determines which portion of $haystack this function returns.
00371  *                If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
00372  *                If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is false.
00373  * @return int|boolean The portion of $haystack, or false if $needle is not found.
00374  * @access public
00375  * @static
00376  */
00377     function stristr($haystack, $needle, $part = false) {
00378         $php = (PHP_VERSION < 5.3);
00379 
00380         if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
00381             $check = Multibyte::strtoupper($haystack);
00382             $check = Multibyte::utf8($check);
00383             $found = false;
00384 
00385             $haystack = Multibyte::utf8($haystack);
00386             $haystackCount = count($haystack);
00387 
00388             $needle = Multibyte::strtoupper($needle);
00389             $needle = Multibyte::utf8($needle);
00390             $needleCount = count($needle);
00391 
00392             $parts = array();
00393             $position = 0;
00394 
00395             while (($found === false) && ($position < $haystackCount)) {
00396                 if (isset($needle[0]) && $needle[0] === $check[$position]) {
00397                     for ($i = 1; $i < $needleCount; $i++) {
00398                         if ($needle[$i] !== $check[$position + $i]) {
00399                             break;
00400                         }
00401                     }
00402                     if ($i === $needleCount) {
00403                         $found = true;
00404                     }
00405                 }
00406                 if (!$found) {
00407                     $parts[] = $haystack[$position];
00408                     unset($haystack[$position]);
00409                 }
00410                 $position++;
00411             }
00412 
00413             if ($found && $part && !empty($parts)) {
00414                 return Multibyte::ascii($parts);
00415             } elseif ($found && !empty($haystack)) {
00416                 return Multibyte::ascii($haystack);
00417             }
00418             return false;
00419         }
00420 
00421         if (!$php) {
00422             return stristr($haystack, $needle, $part);
00423         }
00424         return stristr($haystack, $needle);
00425     }
00426 /**
00427  * Get string length.
00428  *
00429  * @param string $string The string being checked for length.
00430  * @return integer The number of characters in string $string
00431  * @access public
00432  * @static
00433  */
00434     function strlen($string) {
00435         if (Multibyte::checkMultibyte($string)) {
00436             $string = Multibyte::utf8($string);
00437             return count($string);
00438         }
00439         return strlen($string);
00440     }
00441 /**
00442  * Find position of first occurrence of a string.
00443  *
00444  * @param string $haystack The string being checked.
00445  * @param string $needle The position counted from the beginning of haystack.
00446  * @param integer $offset The search offset. If it is not specified, 0 is used.
00447  * @return integer|boolean The numeric position of the first occurrence of $needle in the $haystack string.
00448  *                         If $needle is not found, it returns false.
00449  * @access public
00450  * @static
00451  */
00452     function strpos($haystack, $needle, $offset = 0) {
00453         if (Multibyte::checkMultibyte($haystack)) {
00454             $found = false;
00455 
00456             $haystack = Multibyte::utf8($haystack);
00457             $haystackCount = count($haystack);
00458 
00459             $needle = Multibyte::utf8($needle);
00460             $needleCount = count($needle);
00461 
00462             $position = $offset;
00463 
00464             while (($found === false) && ($position < $haystackCount)) {
00465                 if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
00466                     for ($i = 1; $i < $needleCount; $i++) {
00467                         if ($needle[$i] !== $haystack[$position + $i]) {
00468                             break;
00469                         }
00470                     }
00471                     if ($i === $needleCount) {
00472                         $found = true;
00473                         $position--;
00474                     }
00475                 }
00476                 $position++;
00477             }
00478             if ($found) {
00479                 return $position;
00480             }
00481             return false;
00482         }
00483         return strpos($haystack, $needle, $offset);
00484     }
00485 /**
00486  * Finds the last occurrence of a character in a string within another.
00487  *
00488  * @param string $haystack The string from which to get the last occurrence of $needle.
00489  * @param string $needle The string to find in $haystack.
00490  * @param boolean $part Determines which portion of $haystack this function returns.
00491  *                      If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
00492  *                      If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
00493  * @return string|boolean The portion of $haystack. or false if $needle is not found.
00494  * @access public
00495  * @static
00496  */
00497     function strrchr($haystack, $needle, $part = false) {
00498         $check = Multibyte::utf8($haystack);
00499         $found = false;
00500 
00501         $haystack = Multibyte::utf8($haystack);
00502         $haystackCount = count($haystack);
00503 
00504         $matches = array_count_values($check);
00505 
00506         $needle = Multibyte::utf8($needle);
00507         $needleCount = count($needle);
00508 
00509         $parts = array();
00510         $position = 0;
00511 
00512         while (($found === false) && ($position < $haystackCount)) {
00513             if (isset($needle[0]) && $needle[0] === $check[$position]) {
00514                 for ($i = 1; $i < $needleCount; $i++) {
00515                     if ($needle[$i] !== $check[$position + $i]) {
00516                         if ($needle[$i] === $check[($position + $i) -1]) {
00517                             $found = true;
00518                         }
00519                         unset($parts[$position - 1]);
00520                         $haystack = array_merge(array($haystack[$position]), $haystack);
00521                         break;
00522                     }
00523                 }
00524                 if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
00525                     $matches[$needle[0]] = $matches[$needle[0]] - 1;
00526                 } elseif ($i === $needleCount) {
00527                     $found = true;
00528                 }
00529             }
00530 
00531             if (!$found && isset($haystack[$position])) {
00532                 $parts[] = $haystack[$position];
00533                 unset($haystack[$position]);
00534             }
00535             $position++;
00536         }
00537 
00538         if ($found && $part && !empty($parts)) {
00539             return Multibyte::ascii($parts);
00540         } elseif ($found && !empty($haystack)) {
00541             return Multibyte::ascii($haystack);
00542         }
00543         return false;
00544     }
00545 /**
00546  * Finds the last occurrence of a character in a string within another, case insensitive.
00547  *
00548  * @param string $haystack The string from which to get the last occurrence of $needle.
00549  * @param string $needle The string to find in $haystack.
00550  * @param boolean $part Determines which portion of $haystack this function returns.
00551  *                      If set to true, it returns all of $haystack from the beginning to the last occurrence of $needle.
00552  *                      If set to false, it returns all of $haystack from the last occurrence of $needle to the end, Default value is false.
00553  * @return string|boolean The portion of $haystack. or false if $needle is not found.
00554  * @access public
00555  * @static
00556  */
00557     function strrichr($haystack, $needle, $part = false) {
00558         $check = Multibyte::strtoupper($haystack);
00559         $check = Multibyte::utf8($check);
00560         $found = false;
00561 
00562         $haystack = Multibyte::utf8($haystack);
00563         $haystackCount = count($haystack);
00564 
00565         $matches = array_count_values($check);
00566 
00567         $needle = Multibyte::strtoupper($needle);
00568         $needle = Multibyte::utf8($needle);
00569         $needleCount = count($needle);
00570 
00571         $parts = array();
00572         $position = 0;
00573 
00574         while (($found === false) && ($position < $haystackCount)) {
00575             if (isset($needle[0]) && $needle[0] === $check[$position]) {
00576                 for ($i = 1; $i < $needleCount; $i++) {
00577                     if ($needle[$i] !== $check[$position + $i]) {
00578                         if ($needle[$i] === $check[($position + $i) -1]) {
00579                             $found = true;
00580                         }
00581                         unset($parts[$position - 1]);
00582                         $haystack = array_merge(array($haystack[$position]), $haystack);
00583                         break;
00584                     }
00585                 }
00586                 if (isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
00587                     $matches[$needle[0]] = $matches[$needle[0]] - 1;
00588                 } elseif ($i === $needleCount) {
00589                     $found = true;
00590                 }
00591             }
00592 
00593             if (!$found && isset($haystack[$position])) {
00594                 $parts[] = $haystack[$position];
00595                 unset($haystack[$position]);
00596             }
00597             $position++;
00598         }
00599 
00600         if ($found && $part && !empty($parts)) {
00601             return Multibyte::ascii($parts);
00602         } elseif ($found && !empty($haystack)) {
00603             return Multibyte::ascii($haystack);
00604         }
00605         return false;
00606     }
00607 /**
00608  * Finds position of last occurrence of a string within another, case insensitive
00609  *
00610  * @param string $haystack The string from which to get the position of the last occurrence of $needle.
00611  * @param string $needle The string to find in $haystack.
00612  * @param integer $offset The position in $haystack to start searching.
00613  * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string, or false if $needle is not found.
00614  * @access public
00615  * @static
00616  */
00617     function strripos($haystack, $needle, $offset = 0) {
00618         if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
00619             $found = false;
00620             $haystack = Multibyte::strtoupper($haystack);
00621             $haystack = Multibyte::utf8($haystack);
00622             $haystackCount = count($haystack);
00623 
00624             $matches = array_count_values($haystack);
00625 
00626             $needle = Multibyte::strtoupper($needle);
00627             $needle = Multibyte::utf8($needle);
00628             $needleCount = count($needle);
00629 
00630             $position = $offset;
00631 
00632             while (($found === false) && ($position < $haystackCount)) {
00633                 if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
00634                     for ($i = 1; $i < $needleCount; $i++) {
00635                         if ($needle[$i] !== $haystack[$position + $i]) {
00636                             if ($needle[$i] === $haystack[($position + $i) -1]) {
00637                                 $position--;
00638                                 $found = true;
00639                                 continue;
00640                             }
00641                         }
00642                     }
00643 
00644                     if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
00645                         $matches[$needle[0]] = $matches[$needle[0]] - 1;
00646                     } elseif ($i === $needleCount) {
00647                         $found = true;
00648                         $position--;
00649                     }
00650                 }
00651                 $position++;
00652             }
00653             return ($found) ? $position : false;
00654         }
00655         return strripos($haystack, $needle, $offset);
00656     }
00657 
00658 /**
00659  * Find position of last occurrence of a string in a string.
00660  *
00661  * @param string $haystack The string being checked, for the last occurrence of $needle.
00662  * @param string $needle The string to find in $haystack.
00663  * @param integer $offset May be specified to begin searching an arbitrary number of characters into the string.
00664  *                        Negative values will stop searching at an arbitrary point prior to the end of the string.
00665  * @return integer|boolean The numeric position of the last occurrence of $needle in the $haystack string. If $needle is not found, it returns false.
00666  * @access public
00667  * @static
00668  */
00669     function strrpos($haystack, $needle, $offset = 0) {
00670         if (!PHP5 || Multibyte::checkMultibyte($haystack)) {
00671             $found = false;
00672 
00673             $haystack = Multibyte::utf8($haystack);
00674             $haystackCount = count($haystack);
00675 
00676             $matches = array_count_values($haystack);
00677 
00678             $needle = Multibyte::utf8($needle);
00679             $needleCount = count($needle);
00680 
00681             $position = $offset;
00682 
00683             while (($found === false) && ($position < $haystackCount)) {
00684                 if (isset($needle[0]) && $needle[0] === $haystack[$position]) {
00685                     for ($i = 1; $i < $needleCount; $i++) {
00686                         if ($needle[$i] !== $haystack[$position + $i]) {
00687                             if ($needle[$i] === $haystack[($position + $i) -1]) {
00688                                 $position--;
00689                                 $found = true;
00690                                 continue;
00691                             }
00692                         }
00693                     }
00694 
00695                     if (!$offset && isset($matches[$needle[0]]) && $matches[$needle[0]] > 1) {
00696                         $matches[$needle[0]] = $matches[$needle[0]] - 1;
00697                     } elseif ($i === $needleCount) {
00698                         $found = true;
00699                         $position--;
00700                     }
00701                 }
00702                 $position++;
00703             }
00704             return ($found) ? $position : false;
00705         }
00706         return strrpos($haystack, $needle, $offset);
00707     }
00708 /**
00709  * Finds first occurrence of a string within another
00710  *
00711  * @param string $haystack The string from which to get the first occurrence of $needle.
00712  * @param string $needle The string to find in $haystack
00713  * @param boolean $part Determines which portion of $haystack this function returns.
00714  *                      If set to true, it returns all of $haystack from the beginning to the first occurrence of $needle.
00715  *                      If set to false, it returns all of $haystack from the first occurrence of $needle to the end, Default value is FALSE.
00716  * @return string|boolean The portion of $haystack, or true if $needle is not found.
00717  * @access public
00718  * @static
00719  */
00720     function strstr($haystack, $needle, $part = false) {
00721         $php = (PHP_VERSION < 5.3);
00722 
00723         if (($php && $part) || Multibyte::checkMultibyte($haystack)) {
00724             $check = Multibyte::utf8($haystack);
00725             $found = false;
00726 
00727             $haystack = Multibyte::utf8($haystack);
00728             $haystackCount = count($haystack);
00729 
00730             $needle = Multibyte::utf8($needle);
00731             $needleCount = count($needle);
00732 
00733             $parts = array();
00734             $position = 0;
00735 
00736             while (($found === false) && ($position < $haystackCount)) {
00737                 if (isset($needle[0]) && $needle[0] === $check[$position]) {
00738                     for ($i = 1; $i < $needleCount; $i++) {
00739                         if ($needle[$i] !== $check[$position + $i]) {
00740                             break;
00741                         }
00742                     }
00743                     if ($i === $needleCount) {
00744                         $found = true;
00745                     }
00746                 }
00747                 if (!$found) {
00748                     $parts[] = $haystack[$position];
00749                     unset($haystack[$position]);
00750                 }
00751                 $position++;
00752             }
00753 
00754             if ($found && $part && !empty($parts)) {
00755                 return Multibyte::ascii($parts);
00756             } elseif ($found && !empty($haystack)) {
00757                 return Multibyte::ascii($haystack);
00758             }
00759             return false;
00760         }
00761 
00762         if (!$php) {
00763             return strstr($haystack, $needle, $part);
00764         }
00765         return strstr($haystack, $needle);
00766     }
00767 /**
00768  * Make a string lowercase
00769  *
00770  * @param string $string The string being lowercased.
00771  * @return string with all alphabetic characters converted to lowercase.
00772  * @access public
00773  * @static
00774  */
00775     function strtolower($string) {
00776         $_this =& Multibyte::getInstance();
00777         $utf8Map = Multibyte::utf8($string);
00778 
00779         $length = count($utf8Map);
00780         $lowerCase = array();
00781         $matched = false;
00782 
00783         for ($i = 0 ; $i < $length; $i++) {
00784             $char = $utf8Map[$i];
00785 
00786             if ($char < 128) {
00787                 $str = strtolower(chr($char));
00788                 $strlen = strlen($str);
00789                 for ($ii = 0 ; $ii < $strlen; $ii++) {
00790                     $lower = ord(substr($str, $ii, 1));
00791                 }
00792                 $lowerCase[] = $lower;
00793                 $matched = true;
00794             } else {
00795                 $matched = false;
00796                 $keys = $_this->__find($char, 'upper');
00797 
00798                 if (!empty($keys)) {
00799                     foreach ($keys as $key => $value) {
00800                         if ($keys[$key]['upper'] == $char && count($keys[$key]['lower'][0]) === 1) {
00801                             $lowerCase[] = $keys[$key]['lower'][0];
00802                             $matched = true;
00803                             break 1;
00804                         }
00805                     }
00806                 }
00807             }
00808             if ($matched === false) {
00809                 $lowerCase[] = $char;
00810             }
00811         }
00812         return Multibyte::ascii($lowerCase);
00813     }
00814 /**
00815  * Make a string uppercase
00816  *
00817  * @param string $string The string being uppercased.
00818  * @param string $encoding Character encoding name to use. If it is omitted, internal character encoding is used.
00819  * @return string with all alphabetic characters converted to uppercase.
00820  * @access public
00821  * @static
00822  */
00823     function strtoupper($string) {
00824         $_this =& Multibyte::getInstance();
00825         $utf8Map = Multibyte::utf8($string);
00826 
00827         $length = count($utf8Map);
00828         $matched = false;
00829         $replaced = array();
00830         $upperCase = array();
00831 
00832         for ($i = 0 ; $i < $length; $i++) {
00833             $char = $utf8Map[$i];
00834 
00835             if ($char < 128) {
00836                 $str = strtoupper(chr($char));
00837                 $strlen = strlen($str);
00838                 for ($ii = 0 ; $ii < $strlen; $ii++) {
00839                     $upper = ord(substr($str, $ii, 1));
00840                 }
00841                 $upperCase[] = $upper;
00842                 $matched = true;
00843 
00844             } else {
00845                 $matched = false;
00846                 $keys = $_this->__find($char);
00847                 $keyCount = count($keys);
00848 
00849                 if (!empty($keys)) {
00850                     foreach ($keys as $key => $value) {
00851                         $matched = false;
00852                         $replace = 0;
00853                         if ($length > 1 && count($keys[$key]['lower']) > 1) {
00854                             $j = 0;
00855 
00856                             for ($ii = 0; $ii < count($keys[$key]['lower']); $ii++) {
00857                                 $nextChar = $utf8Map[$i + $ii];
00858 
00859                                 if (isset($nextChar) && ($nextChar == $keys[$key]['lower'][$j + $ii])) {
00860                                     $replace++;
00861                                 }
00862                             }
00863                             if ($replace == count($keys[$key]['lower'])) {
00864                                 $upperCase[] = $keys[$key]['upper'];
00865                                 $replaced = array_merge($replaced, array_values($keys[$key]['lower']));
00866                                 $matched = true;
00867                                 break 1;
00868                             }
00869                         } elseif ($length > 1 && $keyCount > 1) {
00870                             $j = 0;
00871                             for ($ii = 1; $ii < $keyCount; $ii++) {
00872                                 $nextChar = $utf8Map[$i + $ii - 1];
00873 
00874                                 if (in_array($nextChar, $keys[$ii]['lower'])) {
00875 
00876                                     for ($jj = 0; $jj < count($keys[$ii]['lower']); $jj++) {
00877                                         $nextChar = $utf8Map[$i + $jj];
00878 
00879                                         if (isset($nextChar) && ($nextChar == $keys[$ii]['lower'][$j + $jj])) {
00880                                             $replace++;
00881                                         }
00882                                     }
00883                                     if ($replace == count($keys[$ii]['lower'])) {
00884                                         $upperCase[] = $keys[$ii]['upper'];
00885                                         $replaced = array_merge($replaced, array_values($keys[$ii]['lower']));
00886                                         $matched = true;
00887                                         break 2;
00888                                     }
00889                                 }
00890                             }
00891                         }
00892                         if ($keys[$key]['lower'][0] == $char) {
00893                             $upperCase[] = $keys[$key]['upper'];
00894                             $matched = true;
00895                             break 1;
00896                         }
00897                     }
00898                 }
00899             }
00900             if ($matched === false && !in_array($char, $replaced, true)) {
00901                 $upperCase[] = $char;
00902             }
00903         }
00904         return Multibyte::ascii($upperCase);
00905     }
00906 /**
00907  * Count the number of substring occurrences
00908  *
00909  * @param string $haystack The string being checked.
00910  * @param string $needle The string being found.
00911  * @return integer The number of times the $needle substring occurs in the $haystack string.
00912  * @access public
00913  * @static
00914  */
00915     function substrCount($haystack, $needle) {
00916         $count = 0;
00917         $haystack = Multibyte::utf8($haystack);
00918         $haystackCount = count($haystack);
00919         $matches = array_count_values($haystack);
00920         $needle = Multibyte::utf8($needle);
00921         $needleCount = count($needle);
00922 
00923         if ($needleCount === 1 && isset($matches[$needle[0]])) {
00924             return $matches[$needle[0]];
00925         }
00926 
00927         for ($i = 0; $i < $haystackCount; $i++) {
00928             if (isset($needle[0]) && $needle[0] === $haystack[$i]) {
00929                 for ($ii = 1; $ii < $needleCount; $ii++) {
00930                     if ($needle[$ii] === $haystack[$i + 1]) {
00931                         if ((isset($needle[$ii + 1]) && $haystack[$i + 2]) && $needle[$ii + 1] !== $haystack[$i + 2]) {
00932                             $count--;
00933                         } else {
00934                             $count++;
00935                         }
00936                     }
00937                 }
00938             }
00939         }
00940         return $count;
00941     }
00942 /**
00943  * Get part of string
00944  *
00945  * @param string $string The string being checked.
00946  * @param integer $start The first position used in $string.
00947  * @param integer $length The maximum length of the returned string.
00948  * @return string The portion of $string specified by the $string and $length parameters.
00949  * @access public
00950  * @static
00951  */
00952     function substr($string, $start, $length = null) {
00953         if ($start === 0 && $length === null) {
00954             return $string;
00955         }
00956 
00957         $string = Multibyte::utf8($string);
00958         $stringCount = count($string);
00959 
00960         for ($i = 1; $i <= $start; $i++) {
00961             unset($string[$i - 1]);
00962         }
00963 
00964         if ($length === null || count($string) < $length) {
00965             return Multibyte::ascii($string);
00966         }
00967         $string = array_values($string);
00968 
00969         $value = array();
00970         for ($i = 0; $i < $length; $i++) {
00971             $value[] = $string[$i];
00972         }
00973         return Multibyte::ascii($value);
00974     }
00975 /**
00976  * Prepare a string for mail transport, using the provided encoding
00977  *
00978  * @param string $string value to encode
00979  * @param string $charset charset to use for encoding. defaults to UTF-8
00980  * @param string $newline
00981  * @return string
00982  * @access public
00983  * @static
00984  * @TODO: add support for 'Q'('Quoted Printable') encoding
00985  */
00986     function mimeEncode($string, $charset = null, $newline = "\r\n") {
00987         if (!Multibyte::checkMultibyte($string) && strlen($string) < 75) {
00988             return $string;
00989         }
00990 
00991         if (empty($charset)) {
00992             $charset = Configure::read('App.encoding');
00993         }
00994         $charset = strtoupper($charset);
00995 
00996         $start = '=?' . $charset . '?B?';
00997         $end = '?=';
00998         $spacer = $end . $newline . ' ' . $start;
00999 
01000         $length = 75 - strlen($start) - strlen($end);
01001         $length = $length - ($length % 4);
01002         if ($charset == 'UTF-8') {
01003             $parts = array();
01004             $maxchars = floor(($length * 3) / 4);
01005             while (strlen($string) > $maxchars) {
01006                 $i = $maxchars;
01007                 $test = ord($string[$i]);
01008                 while ($test >= 128 && $test <= 191) {
01009                     $i--;
01010                     $test = ord($string[$i]);
01011                 }
01012                 $parts[] = base64_encode(substr($string, 0, $i));
01013                 $string = substr($string, $i);
01014             }
01015             $parts[] = base64_encode($string);
01016             $string = implode($spacer, $parts);
01017         } else {
01018             $string = chunk_split(base64_encode($string), $length, $spacer);
01019             $string = preg_replace('/' . preg_quote($spacer) . '$/', '', $string);
01020         }
01021         return $start . $string . $end;
01022     }
01023 /**
01024  * Return the Code points range for Unicode characters
01025  *
01026  * @param interger $decimal
01027  * @return string
01028  * @access private
01029  */
01030     function __codepoint ($decimal) {
01031         if ($decimal > 128 && $decimal < 256)  {
01032             $return = '0080_00ff'; // Latin-1 Supplement
01033         } elseif ($decimal < 384) {
01034             $return = '0100_017f'; // Latin Extended-A
01035         } elseif ($decimal < 592) {
01036             $return = '0180_024F'; // Latin Extended-B
01037         } elseif ($decimal < 688) {
01038             $return = '0250_02af'; // IPA Extensions
01039         } elseif ($decimal >= 880 && $decimal < 1024) {
01040             $return = '0370_03ff'; // Greek and Coptic
01041         } elseif ($decimal < 1280) {
01042             $return = '0400_04ff'; // Cyrillic
01043         } elseif ($decimal < 1328) {
01044             $return = '0500_052f'; // Cyrillic Supplement
01045         } elseif ($decimal < 1424) {
01046             $return = '0530_058f'; // Armenian
01047         } elseif ($decimal >= 7680 && $decimal < 7936) {
01048             $return = '1e00_1eff'; // Latin Extended Additional
01049         } elseif ($decimal < 8192) {
01050             $return = '1f00_1fff'; // Greek Extended
01051         } elseif ($decimal >= 8448 && $decimal < 8528) {
01052             $return = '2100_214f'; // Letterlike Symbols
01053         } elseif ($decimal < 8592) {
01054             $return = '2150_218f'; // Number Forms
01055         } elseif ($decimal >= 9312 && $decimal < 9472) {
01056             $return = '2460_24ff'; // Enclosed Alphanumerics
01057         } elseif ($decimal >= 11264 && $decimal < 11360) {
01058             $return = '2c00_2c5f'; // Glagolitic
01059         } elseif ($decimal < 11392) {
01060             $return = '2c60_2c7f'; // Latin Extended-C
01061         } elseif ($decimal < 11520) {
01062             $return = '2c80_2cff'; // Coptic
01063         } elseif ($decimal >= 65280 && $decimal < 65520) {
01064             $return = 'ff00_ffef'; // Halfwidth and Fullwidth Forms
01065         } else {
01066             $return = false;
01067         }
01068         $this->__codeRange[$decimal] = $return;
01069         return $return;
01070     }
01071 /**
01072  * Find the related code folding values for $char
01073  *
01074  * @param integer $char decimal value of character
01075  * @param string $type
01076  * @return array
01077  * @access private
01078  */
01079     function __find($char, $type = 'lower') {
01080         $value = false;
01081         $found = array();
01082         if (!isset($this->__codeRange[$char])) {
01083             $range = $this->__codepoint($char);
01084             if ($range === false) {
01085                 return null;
01086             }
01087             Configure::load('unicode' . DS . 'casefolding' . DS . $range);
01088             $this->__caseFold[$range] = Configure::read($range);
01089             Configure::delete($range);
01090         }
01091 
01092         if (!$this->__codeRange[$char]) {
01093             return null;
01094         }
01095         $this->__table = $this->__codeRange[$char];
01096         $count = count($this->__caseFold[$this->__table]);
01097 
01098         for ($i = 0; $i < $count; $i++) {
01099             if ($type === 'lower' && $this->__caseFold[$this->__table][$i][$type][0] === $char) {
01100                 $found[] = $this->__caseFold[$this->__table][$i];
01101             } elseif ($type === 'upper' && $this->__caseFold[$this->__table][$i][$type] === $char) {
01102                 $found[] = $this->__caseFold[$this->__table][$i];
01103             }
01104         }
01105         return $found;
01106     }
01107 /**
01108  * Check the $string for multibyte characters
01109  * @param string $string value to test
01110  * @return boolean
01111  * @access public
01112  * @static
01113  */
01114     function checkMultibyte($string) {
01115         $length = strlen($string);
01116 
01117         for ($i = 0; $i < $length; $i++ ) {
01118             $value = ord(($string[$i]));
01119             if ($value > 128) {
01120                 return true;
01121             }
01122         }
01123         return false;
01124     }
01125 }
01126 ?>

Generated on Sun Nov 22 00:30:53 2009 for CakePHP 1.2.x.x (v1.2.4.8284) by doxygen 1.4.7