SOURCE CODE: Uize.Wsh.AuditStrings

VIEW REFERENCE

/*______________
|       ______  |   U I Z E    J A V A S C R I P T    F R A M E W O R K
|     /      /  |   ---------------------------------------------------
|    /    O /   |    MODULE : Uize.Wsh.AuditStrings Package
|   /    / /    |
|  /    / /  /| |    ONLINE : http://uize.com
| /____/ /__/_| | COPYRIGHT : (c)2009 UIZE
|          /___ |   LICENSE : Available under MIT License or GNU General Public License
|_______________|             http://uize.com/license.html
*/

/*ScruncherSettings Mappings="=" LineCompacting="TRUE"*/

/*?
  Introduction
    The =Uize.Wsh.AuditStrings= package provides a method to audit all JavaScript files in a folder for literal strings - useful for internationalization.

    *DEVELOPERS:* `Chris van Rensburg`

    The =Uize.Wsh.AuditStrings= modules is a package under the =Uize.Wsh= namespace, designed specifically to run in the context of Windows Script Host.
*/

/*
  TO DO:
    What's still being missed that should be recognized as non-internationalizable...
      250x155 - width x height in pixels

      imageTextSpacingX - not being recognized as camelCase identifier because last capital is not followed by any lowercase characters

      disableGSDProducts, fixedPPI - consecutive caps

      & - any single entity should be filtered out

       - just a tag, without contents should be detectable (open or close tags)
       - catch one or more just tags in a row (eg. 


) Content-length, Content-type - specific HTTP headers -1000px - /^(-?\d+)?px$/ womens_laceup_heel_womens5.5 - it looks like an identifier with underscores, but there's a period sz500 - a word with multiple digits top= - fragments of code (how to identify them?) -jiggler - hyphen starts a word pd - two or more adjacent letters with no vowels (lowercase or uppercase) - ff00ff - six letter hex format strings should be filtered to LIKELY NON-INTERNATIONALIZABLE STRINGS - miscellaneous price?input=js&output=js MultiProductFactory_form-designData ?private=true &end= create/ icon_medium icon_medium- .value _ctgy_in.value window.parent.parent.document.frmParms.mat resizable=yes,status=no,scrollbars=yes,location=no,toolbar=no,directories=no,menubar=no,width=470,innerWidth=470,height=400,innerHeight=400 - switches - a switch for levels of doubt 1: show only LIKELY INTERNATIONALIZABLE STRINGS 2: show LIKELY INTERNATIONALIZABLE STRINGS, POSSIBLY INTERNATIONALIZABLE STRINGS 3: show LIKELY INTERNATIONALIZABLE STRINGS, POSSIBLY INTERNATIONALIZABLE STRINGS, LIKELY NON-INTERNATIONALIZABLE STRINGS 4: show LIKELY INTERNATIONALIZABLE STRINGS, POSSIBLY INTERNATIONALIZABLE STRINGS, LIKELY NON-INTERNATIONALIZABLE STRINGS, NON-INTERNATIONALIZABLE STRINGS - a switch for not showing headings for empty buckets - a switch for not showing files for which all buckets that would be displayed are empty - summary for overal totals for all buckets, per file, and for all files - ability to supply additional dictionaries of known non-internationalizable strings and likely non-internationalizable strings, and regular expressions as well - idea: summary for cases where creating a variable for a string that is repeatedly used would save some space when the file is scrunched */ Uize.module ({ name:'Uize.Wsh.AuditStrings', required:[ 'Uize.Scruncher', 'Uize.Data', 'Uize.String' ], builder:function () { /*** Variables for Scruncher Optimization ***/ var _package = function () {}; /*** Global Variables ***/ var _eventNames = [ 'abort', 'activate', 'afterupdate', 'beforedeactivate', 'beforeeditfocus', 'beforeupdate', 'blur', 'cellchange', 'change', 'click', 'dblclick', 'deactivate', 'drag', 'dragend', 'dragenter', 'dragleave', 'dragover', 'drop', 'error', 'finish', 'focus', 'help', 'keydown', 'keypress', 'keyup', 'load', 'losecapture', 'mousedown', 'mousemove', 'mouseup', 'mouseout', 'mouseover', 'propertychange', 'readystatechange', 'rowenter', 'rowexit', 'rowsdelete', 'rowsinserted', 'scroll', 'submit', 'start', 'unload' ], _nonI18nStringsDictionary = [ /*** pretty JavaScript-specific value types ***/ 'function', 'object', 'string', 'undefined', // maybe these should be in a likely list? /*** properties/attributes that are distinctive enough ***/ 'alt', 'href', 'src', /*** JavaScript-specific acronyms ***/ 'ajax', 'json', 'CSS1Compat', /*** HTML tag names that are distinctive enough ***/ 'div', 'DIV', 'hr', 'HR', 'iframe', 'IFRAME', 'img', 'IMG', 'li', 'LI', 'ol', 'OL', 'span', 'SPAN', 'td', 'TD', 'textarea', 'TEXTAREA', 'tr', 'TR', 'ul', 'UL', /*** file extensions ***/ '.asp', '.ASP', '.gif', '.html', '.jpg', '.js', '.jst', '.png', '.PNG', '.txt', '.xhtml', '.xml' // this could be in a regular expression, with leading period optional, and case insensitive, perhaps it would be good to add a filename match, to catch things like, filename.gif, .gif, and gif (ie. patterns like [[filename].]gif|jpg|html) ].concat ( _eventNames, Uize.Data.map ('\'on\' + value',_eventNames) ), _nonI18nStringsDictionaryLookup ; /*** Public Static Methods ***/ _package.perform = function (_params) { var _endsWithDotJsRegExp = /\.js$/, _sourceFolderName = _params.sourceFolderName ; if (!_nonI18nStringsDictionaryLookup) _nonI18nStringsDictionaryLookup = Uize.Data.getLookup (_nonI18nStringsDictionary) ; Uize.Wsh.buildFiles ( Uize.copyInto ( { targetFolderPathCreator:function (_folderPath) { return Uize.String.endsWith (_folderPath,_sourceFolderName) ? _folderPath : null; }, targetFilenameCreator:function (_sourceFileName) { return _endsWithDotJsRegExp.test (_sourceFileName) ? _sourceFileName : null; }, fileBuilder:function (_sourceFileName,_sourceFileText) { var _scruncherResult = Uize.Scruncher.scrunch (_sourceFileText,{AUDITSTRINGS:true}), _stringsMap = _scruncherResult.stringsMap, _strings = Uize.Data.getKeys (_stringsMap), _nonI18nStrings = [], _likelyNonI18nStrings = [], _possibleI18nStrings = [], _likelyI18nStrings = [] ; _strings.sort (); for (var _stringNo = -1, _stringsLength = _strings.length; ++_stringNo < _stringsLength;) { var _string = _strings [_stringNo]; ( _nonI18nStringsDictionaryLookup [_string] || // ignore strings that are recognized as non-internationalizable strings !/\S/.test (_string) || // ignore strings that are only whitespace (spaces, tabs, linebreaks, etc.) !/[a-zA-Z]/.test (_string) || // ignore strings that have no letter characters /^(#|0x)([0-9a-fA-F]{3}){1,2}$/.test (_string) || // ignore hex RGB color values /^[A-Z][a-zA-Z0-9$_]*(\.[a-zA-Z0-9$_]+)+$/.test (_string) || // ignore what look like module names /^Uize/i.test (_string) || // if it starts with "Uize", it's related to the framework /^[a-zA-Z0-9$_]*_[a-zA-Z0-9$_]*$/.test (_string) || // ignore what look like underscore delimited identifiers /^Changed\.(\*|[a-zA-Z0-1]+)$/.test (_string) || // ignore Changed.[propertyName] events /^\S*[\/\\][\w_]+[\/\\]\S*$/.test (_string) || // ignore what look like URL paths /^\$?[a-zA-Z][a-z0-9]*([A-Z][a-z0-9]+)+$/.test (_string) // ignore what look obviously like camelCase identifiers /* TO DO: catch strings that are only numbers */ ? _nonI18nStrings : ( /[a-zA-Z]{2,}/.test (_string) && // string must have at least two consecutive word characters !/^\S*[\w_]+[\/\\][\w_]+\S*$/.test (_string) // ignore what could be short URL snippets ? ( /\b[a-zA-Z][a-z]*\s[a-z]+\s[a-zA-Z][a-z]*\b/.test (_string) ? _likelyI18nStrings : _possibleI18nStrings ) : _likelyNonI18nStrings ) ).push (_string + ' --- ' + _stringsMap [_string]); } return { logDetails: '\t\tNON-INTERNATIONALIZABLE STRINGS\n' + Uize.String.hugJoin (_nonI18nStrings,'\t\t\t','\n') + '\n' + '\t\tLIKELY NON-INTERNATIONALIZABLE STRINGS\n' + Uize.String.hugJoin (_likelyNonI18nStrings,'\t\t\t','\n') + '\n' + '\t\tPOSSIBLY INTERNATIONALIZABLE STRINGS\n' + Uize.String.hugJoin (_possibleI18nStrings,'\t\t\t','\n') + '\n' + '\t\tLIKELY INTERNATIONALIZABLE STRINGS\n' + Uize.String.hugJoin (_likelyI18nStrings,'\t\t\t','\n') }; } }, _params, { alwaysBuild:true, dryRun:true } ) ); }; return _package; } });