inflector.php

Go to the documentation of this file.
00001 <?php
00002 /* SVN FILE: $Id: inflector.php 8170 2009-05-08 13:25:19Z mark_story $ */
00003 /**
00004  * Pluralize and singularize English words.
00005  *
00006  * Used by Cake's naming conventions throughout the framework.
00007  *
00008  * PHP versions 4 and 5
00009  *
00010  * CakePHP(tm) :  Rapid Development Framework (http://www.cakephp.org)
00011  * Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
00012  *
00013  * Licensed under The MIT License
00014  * Redistributions of files must retain the above copyright notice.
00015  *
00016  * @filesource
00017  * @copyright     Copyright 2005-2008, Cake Software Foundation, Inc. (http://www.cakefoundation.org)
00018  * @link          http://www.cakefoundation.org/projects/info/cakephp CakePHP(tm) Project
00019  * @package       cake
00020  * @subpackage    cake.cake.libs
00021  * @since         CakePHP(tm) v 0.2.9
00022  * @version       $Revision: 8170 $
00023  * @modifiedby    $LastChangedBy: mark_story $
00024  * @lastmodified  $Date: 2009-05-08 09:25:19 -0400 (Fri, 08 May 2009) $
00025  * @license       http://www.opensource.org/licenses/mit-license.php The MIT License
00026  */
00027 /**
00028  * Included libraries.
00029  *
00030  */
00031 if (!class_exists('Object')) {
00032     uses('object');
00033 }
00034 if (!class_exists('Set')) {
00035     require LIBS . 'set.php';
00036 }
00037 /**
00038  * Pluralize and singularize English words.
00039  *
00040  * Inflector pluralizes and singularizes English nouns.
00041  * Used by Cake's naming conventions throughout the framework.
00042  * Test with $i = new Inflector(); $i->test();
00043  *
00044  * @package       cake
00045  * @subpackage    cake.cake.libs
00046  * @link          http://book.cakephp.org/view/491/Inflector
00047  */
00048 class Inflector extends Object {
00049 /**
00050  * Pluralized words.
00051  *
00052  * @var array
00053  * @access private
00054  **/
00055     var $pluralized = array();
00056 /**
00057  * List of pluralization rules in the form of pattern => replacement.
00058  *
00059  * @var array
00060  * @access public
00061  * @link http://book.cakephp.org/view/47/Custom-Inflections
00062  **/
00063     var $pluralRules = array();
00064 /**
00065  * Singularized words.
00066  *
00067  * @var array
00068  * @access private
00069  **/
00070     var $singularized = array();
00071 /**
00072  * List of singularization rules in the form of pattern => replacement.
00073  *
00074  * @var array
00075  * @access public
00076  * @link http://book.cakephp.org/view/47/Custom-Inflections
00077  **/
00078     var $singularRules = array();
00079 /**
00080  * Plural rules from inflections.php
00081  *
00082  * @var array
00083  * @access private
00084  **/
00085     var $__pluralRules = array();
00086 /**
00087  * Un-inflected plural rules from inflections.php
00088  *
00089  * @var array
00090  * @access private
00091  **/
00092     var $__uninflectedPlural = array();
00093 /**
00094  * Irregular plural rules from inflections.php
00095  *
00096  * @var array
00097  * @access private
00098  **/
00099     var $__irregularPlural = array();
00100 /**
00101  * Singular rules from inflections.php
00102  *
00103  * @var array
00104  * @access private
00105  **/
00106     var $__singularRules = array();
00107 /**
00108  * Un-inflectd singular rules from inflections.php
00109  *
00110  * @var array
00111  * @access private
00112  **/
00113     var $__uninflectedSingular = array();
00114 /**
00115  * Irregular singular rules from inflections.php
00116  *
00117  * @var array
00118  * @access private
00119  **/
00120     var $__irregularSingular = array();
00121 /**
00122  * Gets a reference to the Inflector object instance
00123  *
00124  * @return object
00125  * @access public
00126  */
00127     function &getInstance() {
00128         static $instance = array();
00129 
00130         if (!$instance) {
00131             $instance[0] =& new Inflector();
00132             if (file_exists(CONFIGS.'inflections.php')) {
00133                 include(CONFIGS.'inflections.php');
00134                 $instance[0]->__pluralRules = $pluralRules;
00135                 $instance[0]->__uninflectedPlural = $uninflectedPlural;
00136                 $instance[0]->__irregularPlural = $irregularPlural;
00137                 $instance[0]->__singularRules = $singularRules;
00138                 $instance[0]->__uninflectedSingular = $uninflectedPlural;
00139                 $instance[0]->__irregularSingular = array_flip($irregularPlural);
00140             }
00141         }
00142         return $instance[0];
00143     }
00144 /**
00145  * Initializes plural inflection rules.
00146  *
00147  * @return void
00148  * @access private
00149  */
00150     function __initPluralRules() {
00151         $corePluralRules = array(
00152             '/(s)tatus$/i' => '\1\2tatuses',
00153             '/(quiz)$/i' => '\1zes',
00154             '/^(ox)$/i' => '\1\2en',
00155             '/([m|l])ouse$/i' => '\1ice',
00156             '/(matr|vert|ind)(ix|ex)$/i'  => '\1ices',
00157             '/(x|ch|ss|sh)$/i' => '\1es',
00158             '/([^aeiouy]|qu)y$/i' => '\1ies',
00159             '/(hive)$/i' => '\1s',
00160             '/(?:([^f])fe|([lr])f)$/i' => '\1\2ves',
00161             '/sis$/i' => 'ses',
00162             '/([ti])um$/i' => '\1a',
00163             '/(p)erson$/i' => '\1eople',
00164             '/(m)an$/i' => '\1en',
00165             '/(c)hild$/i' => '\1hildren',
00166             '/(buffal|tomat)o$/i' => '\1\2oes',
00167             '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$/i' => '\1i',
00168             '/us$/' => 'uses',
00169             '/(alias)$/i' => '\1es',
00170             '/(ax|cris|test)is$/i' => '\1es',
00171             '/s$/' => 's',
00172             '/^$/' => '',
00173             '/$/' => 's');
00174 
00175         $coreUninflectedPlural = array(
00176             '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', 'Amoyese',
00177             'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
00178             'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
00179             'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
00180             'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
00181             'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
00182             'nexus', 'Niasese', 'Pekingese', 'People', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
00183             'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
00184             'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
00185             'whiting', 'wildebeest', 'Yengeese');
00186 
00187         $coreIrregularPlural = array(
00188             'atlas' => 'atlases',
00189             'beef' => 'beefs',
00190             'brother' => 'brothers',
00191             'child' => 'children',
00192             'corpus' => 'corpuses',
00193             'cow' => 'cows',
00194             'ganglion' => 'ganglions',
00195             'genie' => 'genies',
00196             'genus' => 'genera',
00197             'graffito' => 'graffiti',
00198             'hoof' => 'hoofs',
00199             'loaf' => 'loaves',
00200             'man' => 'men',
00201             'money' => 'monies',
00202             'mongoose' => 'mongooses',
00203             'move' => 'moves',
00204             'mythos' => 'mythoi',
00205             'numen' => 'numina',
00206             'occiput' => 'occiputs',
00207             'octopus' => 'octopuses',
00208             'opus' => 'opuses',
00209             'ox' => 'oxen',
00210             'penis' => 'penises',
00211             'person' => 'people',
00212             'sex' => 'sexes',
00213             'soliloquy' => 'soliloquies',
00214             'testis' => 'testes',
00215             'trilby' => 'trilbys',
00216             'turf' => 'turfs');
00217 
00218         $pluralRules = Set::pushDiff($this->__pluralRules, $corePluralRules);
00219         $uninflected = Set::pushDiff($this->__uninflectedPlural, $coreUninflectedPlural);
00220         $irregular = Set::pushDiff($this->__irregularPlural, $coreIrregularPlural);
00221 
00222         $this->pluralRules = array('pluralRules' => $pluralRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
00223         $this->pluralized = array();
00224     }
00225 /**
00226  * Return $word in plural form.
00227  *
00228  * @param string $word Word in singular
00229  * @return string Word in plural
00230  * @access public
00231  * @static
00232  * @link http://book.cakephp.org/view/572/Class-methods
00233  */
00234     function pluralize($word) {
00235         $_this =& Inflector::getInstance();
00236         if (!isset($_this->pluralRules) || empty($_this->pluralRules)) {
00237             $_this->__initPluralRules();
00238         }
00239 
00240         if (isset($_this->pluralized[$word])) {
00241             return $_this->pluralized[$word];
00242         }
00243         extract($_this->pluralRules);
00244 
00245         if (!isset($regexUninflected) || !isset($regexIrregular)) {
00246             $regexUninflected = __enclose(join( '|', $uninflected));
00247             $regexIrregular = __enclose(join( '|', array_keys($irregular)));
00248             $_this->pluralRules['regexUninflected'] = $regexUninflected;
00249             $_this->pluralRules['regexIrregular'] = $regexIrregular;
00250         }
00251 
00252         if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
00253             $_this->pluralized[$word] = $word;
00254             return $word;
00255         }
00256 
00257         if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
00258             $_this->pluralized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
00259             return $_this->pluralized[$word];
00260         }
00261 
00262         foreach ($pluralRules as $rule => $replacement) {
00263             if (preg_match($rule, $word)) {
00264                 $_this->pluralized[$word] = preg_replace($rule, $replacement, $word);
00265                 return $_this->pluralized[$word];
00266             }
00267         }
00268     }
00269 /**
00270  * Initializes singular inflection rules.
00271  *
00272  * @return void
00273  * @access protected
00274  */
00275     function __initSingularRules() {
00276         $coreSingularRules = array(
00277             '/(s)tatuses$/i' => '\1\2tatus',
00278             '/^(.*)(menu)s$/i' => '\1\2',
00279             '/(quiz)zes$/i' => '\\1',
00280             '/(matr)ices$/i' => '\1ix',
00281             '/(vert|ind)ices$/i' => '\1ex',
00282             '/^(ox)en/i' => '\1',
00283             '/(alias)(es)*$/i' => '\1',
00284             '/(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$/i' => '\1us',
00285             '/([ftw]ax)es/' => '\1',
00286             '/(cris|ax|test)es$/i' => '\1is',
00287             '/(shoe)s$/i' => '\1',
00288             '/(o)es$/i' => '\1',
00289             '/ouses$/' => 'ouse',
00290             '/uses$/' => 'us',
00291             '/([m|l])ice$/i' => '\1ouse',
00292             '/(x|ch|ss|sh)es$/i' => '\1',
00293             '/(m)ovies$/i' => '\1\2ovie',
00294             '/(s)eries$/i' => '\1\2eries',
00295             '/([^aeiouy]|qu)ies$/i' => '\1y',
00296             '/([lr])ves$/i' => '\1f',
00297             '/(tive)s$/i' => '\1',
00298             '/(hive)s$/i' => '\1',
00299             '/(drive)s$/i' => '\1',
00300             '/([^fo])ves$/i' => '\1fe',
00301             '/(^analy)ses$/i' => '\1sis',
00302             '/((a)naly|(b)a|(d)iagno|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$/i' => '\1\2sis',
00303             '/([ti])a$/i' => '\1um',
00304             '/(p)eople$/i' => '\1\2erson',
00305             '/(m)en$/i' => '\1an',
00306             '/(c)hildren$/i' => '\1\2hild',
00307             '/(n)ews$/i' => '\1\2ews',
00308             '/^(.*us)$/' => '\\1',
00309             '/s$/i' => '');
00310 
00311         $coreUninflectedSingular = array(
00312             '.*[nrlm]ese', '.*deer', '.*fish', '.*measles', '.*ois', '.*pox', '.*sheep', '.*ss', 'Amoyese',
00313             'bison', 'Borghese', 'bream', 'breeches', 'britches', 'buffalo', 'cantus', 'carp', 'chassis', 'clippers',
00314             'cod', 'coitus', 'Congoese', 'contretemps', 'corps', 'debris', 'diabetes', 'djinn', 'eland', 'elk',
00315             'equipment', 'Faroese', 'flounder', 'Foochowese', 'gallows', 'Genevese', 'Genoese', 'Gilbertese', 'graffiti',
00316             'headquarters', 'herpes', 'hijinks', 'Hottentotese', 'information', 'innings', 'jackanapes', 'Kiplingese',
00317             'Kongoese', 'Lucchese', 'mackerel', 'Maltese', 'media', 'mews', 'moose', 'mumps', 'Nankingese', 'news',
00318             'nexus', 'Niasese', 'Pekingese', 'Piedmontese', 'pincers', 'Pistoiese', 'pliers', 'Portuguese', 'proceedings',
00319             'rabies', 'rice', 'rhinoceros', 'salmon', 'Sarawakese', 'scissors', 'sea[- ]bass', 'series', 'Shavese', 'shears',
00320             'siemens', 'species', 'swine', 'testes', 'trousers', 'trout', 'tuna', 'Vermontese', 'Wenchowese',
00321             'whiting', 'wildebeest', 'Yengeese');
00322 
00323         $coreIrregularSingular = array(
00324             'atlases' => 'atlas',
00325             'beefs' => 'beef',
00326             'brothers' => 'brother',
00327             'children' => 'child',
00328             'corpuses' => 'corpus',
00329             'cows' => 'cow',
00330             'ganglions' => 'ganglion',
00331             'genies' => 'genie',
00332             'genera' => 'genus',
00333             'graffiti' => 'graffito',
00334             'hoofs' => 'hoof',
00335             'loaves' => 'loaf',
00336             'men' => 'man',
00337             'monies' => 'money',
00338             'mongooses' => 'mongoose',
00339             'moves' => 'move',
00340             'mythoi' => 'mythos',
00341             'numina' => 'numen',
00342             'occiputs' => 'occiput',
00343             'octopuses' => 'octopus',
00344             'opuses' => 'opus',
00345             'oxen' => 'ox',
00346             'penises' => 'penis',
00347             'people' => 'person',
00348             'sexes' => 'sex',
00349             'soliloquies' => 'soliloquy',
00350             'testes' => 'testis',
00351             'trilbys' => 'trilby',
00352             'turfs' => 'turf');
00353 
00354         $singularRules = Set::pushDiff($this->__singularRules, $coreSingularRules);
00355         $uninflected = Set::pushDiff($this->__uninflectedSingular, $coreUninflectedSingular);
00356         $irregular = Set::pushDiff($this->__irregularSingular, $coreIrregularSingular);
00357 
00358         $this->singularRules = array('singularRules' => $singularRules, 'uninflected' => $uninflected, 'irregular' => $irregular);
00359         $this->singularized = array();
00360     }
00361 /**
00362  * Return $word in singular form.
00363  *
00364  * @param string $word Word in plural
00365  * @return string Word in singular
00366  * @access public
00367  * @static
00368  * @link http://book.cakephp.org/view/572/Class-methods
00369  */
00370     function singularize($word) {
00371         $_this =& Inflector::getInstance();
00372         if (!isset($_this->singularRules) || empty($_this->singularRules)) {
00373             $_this->__initSingularRules();
00374         }
00375 
00376         if (isset($_this->singularized[$word])) {
00377             return $_this->singularized[$word];
00378         }
00379         extract($_this->singularRules);
00380 
00381         if (!isset($regexUninflected) || !isset($regexIrregular)) {
00382             $regexUninflected = __enclose(join( '|', $uninflected));
00383             $regexIrregular = __enclose(join( '|', array_keys($irregular)));
00384             $_this->singularRules['regexUninflected'] = $regexUninflected;
00385             $_this->singularRules['regexIrregular'] = $regexIrregular;
00386         }
00387 
00388         if (preg_match('/^(' . $regexUninflected . ')$/i', $word, $regs)) {
00389             $_this->singularized[$word] = $word;
00390             return $word;
00391         }
00392 
00393         if (preg_match('/(.*)\\b(' . $regexIrregular . ')$/i', $word, $regs)) {
00394             $_this->singularized[$word] = $regs[1] . substr($word, 0, 1) . substr($irregular[strtolower($regs[2])], 1);
00395             return $_this->singularized[$word];
00396         }
00397 
00398         foreach ($singularRules as $rule => $replacement) {
00399             if (preg_match($rule, $word)) {
00400                 $_this->singularized[$word] = preg_replace($rule, $replacement, $word);
00401                 return $_this->singularized[$word];
00402             }
00403         }
00404         $_this->singularized[$word] = $word;
00405         return $word;
00406     }
00407 /**
00408  * Returns the given lower_case_and_underscored_word as a CamelCased word.
00409  *
00410  * @param string $lower_case_and_underscored_word Word to camelize
00411  * @return string Camelized word. LikeThis.
00412  * @access public
00413  * @static
00414  * @link http://book.cakephp.org/view/572/Class-methods
00415  */
00416     function camelize($lowerCaseAndUnderscoredWord) {
00417         return str_replace(" ", "", ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord)));
00418     }
00419 /**
00420  * Returns the given camelCasedWord as an underscored_word.
00421  *
00422  * @param string $camelCasedWord Camel-cased word to be "underscorized"
00423  * @return string Underscore-syntaxed version of the $camelCasedWord
00424  * @access public
00425  * @static
00426  * @link http://book.cakephp.org/view/572/Class-methods
00427  */
00428     function underscore($camelCasedWord) {
00429         return strtolower(preg_replace('/(?<=\\w)([A-Z])/', '_\\1', $camelCasedWord));
00430     }
00431 /**
00432  * Returns the given underscored_word_group as a Human Readable Word Group.
00433  * (Underscores are replaced by spaces and capitalized following words.)
00434  *
00435  * @param string $lower_case_and_underscored_word String to be made more readable
00436  * @return string Human-readable string
00437  * @access public
00438  * @static
00439  * @link http://book.cakephp.org/view/572/Class-methods
00440  */
00441     function humanize($lowerCaseAndUnderscoredWord) {
00442         return ucwords(str_replace("_", " ", $lowerCaseAndUnderscoredWord));
00443     }
00444 /**
00445  * Returns corresponding table name for given model $className. ("people" for the model class "Person").
00446  *
00447  * @param string $className Name of class to get database table name for
00448  * @return string Name of the database table for given class
00449  * @access public
00450  * @static
00451  * @link http://book.cakephp.org/view/572/Class-methods
00452  */
00453     function tableize($className) {
00454         return Inflector::pluralize(Inflector::underscore($className));
00455     }
00456 /**
00457  * Returns Cake model class name ("Person" for the database table "people".) for given database table.
00458  *
00459  * @param string $tableName Name of database table to get class name for
00460  * @return string Class name
00461  * @access public
00462  * @static
00463  * @link http://book.cakephp.org/view/572/Class-methods
00464  */
00465     function classify($tableName) {
00466         return Inflector::camelize(Inflector::singularize($tableName));
00467     }
00468 /**
00469  * Returns camelBacked version of an underscored string.
00470  *
00471  * @param string $string
00472  * @return string in variable form
00473  * @access public
00474  * @static
00475  * @link http://book.cakephp.org/view/572/Class-methods
00476  */
00477     function variable($string) {
00478         $string = Inflector::camelize(Inflector::underscore($string));
00479         $replace = strtolower(substr($string, 0, 1));
00480         return preg_replace('/\\w/', $replace, $string, 1);
00481     }
00482 /**
00483  * Returns a string with all spaces converted to underscores (by default), accented
00484  * characters converted to non-accented characters, and non word characters removed.
00485  *
00486  * @param string $string
00487  * @param string $replacement
00488  * @return string
00489  * @access public
00490  * @static
00491  * @link http://book.cakephp.org/view/572/Class-methods
00492  */
00493     function slug($string, $replacement = '_') {
00494         if (!class_exists('String')) {
00495             require LIBS . 'string.php';
00496         }
00497         $map = array(
00498             '/à|á|å|â/' => 'a',
00499             '/è|é|ê|ẽ|ë/' => 'e',
00500             '/ì|í|î/' => 'i',
00501             '/ò|ó|ô|ø/' => 'o',
00502             '/ù|ú|ů|û/' => 'u',
00503             '/ç/' => 'c',
00504             '/ñ/' => 'n',
00505             '/ä|æ/' => 'ae',
00506             '/ö/' => 'oe',
00507             '/ü/' => 'ue',
00508             '/Ä/' => 'Ae',
00509             '/Ü/' => 'Ue',
00510             '/Ö/' => 'Oe',
00511             '/ß/' => 'ss',
00512             '/[^\w\s]/' => ' ',
00513             '/\\s+/' => $replacement,
00514             String::insert('/^[:replacement]+|[:replacement]+$/', array('replacement' => preg_quote($replacement, '/'))) => '',
00515         );
00516         return preg_replace(array_keys($map), array_values($map), $string);
00517     }
00518 }
00519 /**
00520  * Enclose a string for preg matching.
00521  *
00522  * @param string $string String to enclose
00523  * @return string Enclosed string
00524  */
00525     function __enclose($string) {
00526         return '(?:' . $string . ')';
00527     }
00528 ?>

Generated on Sun Nov 22 00:30:53 2009 for CakePHP 1.2.x.x (v1.2.4.8284) by doxygen 1.4.7