/*______________
| ______ | U I Z E J A V A S C R I P T F R A M E W O R K
| / / | ---------------------------------------------------
| / O / | MODULE : Uize.Build.AuditStrings Package
| / / / |
| / / / /| | ONLINE : http://uize.com
| /____/ /__/_| | COPYRIGHT : (c)2009-2014 UIZE
| /___ | LICENSE : Available under MIT License or GNU General Public License
|_______________| http://uize.com/license.html
*/
/* Module Meta Data
type: Package
importance: 1
codeCompleteness: 90
docCompleteness: 5
*/
/*?
Introduction
The =Uize.Build.AuditStrings= package provides a method to audit all JavaScript files in a folder for literal strings - useful for internationalization.
*DEVELOPERS:* `Chris van Rensburg`
This build script recurses through all folders of a project, harvests all the string literals from JavaScript files, filters them into different buckets depending on their likelihood of internationalizability, and produces a report with summaries for all the JavaScript files.
The build script groups the literal strings it finds inside a JavaScript file into four buckets...
NON-INTERNATIONALIZABLE STRINGS
This category includes strings that are recognized by certain patterns as being non-internationalizable strings, including...
- DOM event names (eg. =click=)
- very JavaScript specific keywords (eg. =function=)
- HTML specific attribute names (eg. =href=)
- sufficiently distinctive HTML tag names (eg. =div=), file extensions (eg. =.gif=)
- strings that are only whitespace
- strings that have no letter characters
- hex formatted RGB color values (eg. =#ff0000=)
- module names (eg. =Uize.Widget.Bar=)
- any string starting with "Uize"
- underscore delimited identifiers (eg. =button_big_disabled=)
- Changed.[propertyName] events (eg. =Changed.value=), and the Changed.* event
- sufficiently distinguishable URL paths (eg. =myfolder/mysubfolder/myfile.html=)
- camelCase identifiers (eg. =languageSortAscending=)
LIKELY NON-INTERNATIONALIZABLE STRINGS
This category includes strings with only one letter character (eg. =a=), and strings that look like short url snippets (eg. =myfolder/mysubfolder=).
POSSIBLY INTERNATIONALIZABLE STRINGS
This category includes strings that are not filtered out into either the `NON-INTERNATIONALIZABLE STRINGS`, `LIKELY NON-INTERNATIONALIZABLE STRINGS`, or `LIKELY INTERNATIONALIZABLE STRINGS` categories.
LIKELY INTERNATIONALIZABLE STRINGS
This category includes strings that are not filtered out into either the `NON-INTERNATIONALIZABLE STRINGS` or `LIKELY NON-INTERNATIONALIZABLE STRINGS` categories and that contain three adjacent, space separated words, where the middle word is all lowecase.
Below is a snippet from the log file after this build script was run inside the =UIZE-JavaScript-Framework= folder of the UIZE Web site project...
LOG FILE SNIPPET
..........................................................................................
***** C:\~uize\UIZE-JavaScript-Framework\site-source\js\Uize.Widget.TableSort.js
TARGET FILE: C:\~uize\UIZE-JavaScript-Framework\site-source\js\Uize.Widget.TableSort.js
BUILT (ALWAYS BUILD), BUILD DURATION: 188ms
NON-INTERNATIONALIZABLE STRINGS
--- 92,101,189
TD --- 54
TR --- 142,249,274
Uize.Node --- 22
Uize.Widget.TableSort --- 21
headingLitClass --- 346
headingOverClass --- 342
languageSortAscending --- 350
languageSortDescending --- 355
click --- 292
rowOverClass --- 360
updateUi --- 339
LIKELY NON-INTERNATIONALIZABLE STRINGS
POSSIBLY INTERNATIONALIZABLE STRINGS
TH --- 55
tbody --- 50
thead --- 272
LIKELY INTERNATIONALIZABLE STRINGS
Click to sort in ascending order --- 352
Click to sort in descending order --- 357
..........................................................................................
A few things to notice about the format...
- the strings are listed in ASCIIbetically sorted order
- to the right of each string is a listing of all the line numbers on which the string occurs
THIS ONE'S SLOW
Be warned: this build script can be quite slow to run, especially if you have a large project with many folders and many JavaScript files. It could take a few minutes to process all JavaScript files in a large project. You'll know when it's done running by the modified date of the associated log file, or you can watch the *WSCRIPT.EXE* process in the Windows Task Manager.
NOTES
- the summary info for this build script is output to the log file =Uize.Build.AuditStrings.log=
*/
Uize.module ({
name:'Uize.Build.AuditStrings',
required:[
'Uize.Build.Util',
'Uize.Build.Scruncher',
'Uize.Array.Join'
],
builder:function () {
'use strict';
/*** General Variables ***/
var
_eventNames = [
'abort', 'activate', 'afterupdate', 'beforedeactivate', 'beforeeditfocus', 'beforeupdate', 'blur', 'cellchange', 'change', 'click', 'dblclick', 'deactivate', 'drag', 'dragend', 'dragenter', 'dragleave', 'dragover', 'drop', 'error', 'finish', 'focus', 'help', 'keydown', 'keypress', 'keyup', 'load', 'losecapture', 'mousedown', 'mousemove', 'mouseup', 'mouseout', 'mouseover', 'propertychange', 'readystatechange', 'rowenter', 'rowexit', 'rowsdelete', 'rowsinserted', 'scroll', 'submit', 'start', 'unload'
],
_nonI18nStringsDictionary = [
/*** pretty JavaScript-specific value types ***/
'function', 'object', 'string', 'undefined', // maybe these should be in a likely list?
/*** properties/attributes that are distinctive enough ***/
'alt', 'href', 'src',
/*** JavaScript-specific acronyms ***/
'ajax', 'json', 'CSS1Compat',
/*** HTML tag names that are distinctive enough ***/
'div', 'DIV', 'hr', 'HR', 'iframe', 'IFRAME', 'img', 'IMG', 'li', 'LI', 'ol', 'OL', 'span', 'SPAN', 'td', 'TD', 'textarea', 'TEXTAREA', 'tr', 'TR', 'ul', 'UL',
/*** file extensions ***/
'.asp', '.ASP', '.gif', '.html', '.jpg', '.js', '.jst', '.png', '.PNG', '.txt', '.xhtml', '.xml' // this could be in a regular expression, with leading period optional, and case insensitive, perhaps it would be good to add a filename match, to catch things like, filename.gif, .gif, and gif (ie. patterns like [[filename].]gif|jpg|html)
].concat (
_eventNames,
Uize.map (_eventNames,'\'on\' + value')
),
_nonI18nStringsDictionaryLookup
;
return Uize.package ({
perform:function (_params) {
var _endsWithDotJsRegExp = /\.js$/;
if (!_nonI18nStringsDictionaryLookup)
_nonI18nStringsDictionaryLookup = Uize.lookup (_nonI18nStringsDictionary)
;
Uize.Build.Util.buildFiles ({
targetFolderPathCreator:function (_folderPath) {
return _folderPath;
},
targetFilenameCreator:function (_sourceFileName) {
return _endsWithDotJsRegExp.test (_sourceFileName) ? _sourceFileName : null;
},
fileBuilder:function (_sourceFileName,_sourceFileText) {
var
_scruncherResult = Uize.Build.Scruncher.scrunch (_sourceFileText,{AUDITSTRINGS:true}),
_stringsMap = _scruncherResult.stringsMap,
_strings = Uize.keys (_stringsMap),
_nonI18nStrings = [],
_likelyNonI18nStrings = [],
_possibleI18nStrings = [],
_likelyI18nStrings = []
;
_strings.sort ();
for (var _stringNo = -1, _stringsLength = _strings.length; ++_stringNo < _stringsLength;) {
var _string = _strings [_stringNo];
(
_nonI18nStringsDictionaryLookup [_string] ||
// ignore strings that are recognized as non-internationalizable strings
!/\S/.test (_string) ||
// ignore strings that are only whitespace (spaces, tabs, linebreaks, etc.)
!/[a-zA-Z]/.test (_string) ||
// ignore strings that have no letter characters
/^(#|0x)([0-9a-fA-F]{3}){1,2}$/.test (_string) ||
// ignore hex RGB color values
/^[A-Z][a-zA-Z0-9$_]*(\.[a-zA-Z0-9$_]+)+$/.test (_string) ||
// ignore what look like module names
/^Uize/i.test (_string) ||
// if it starts with "Uize", it's related to the framework
/^[a-zA-Z0-9$_]*_[a-zA-Z0-9$_]*$/.test (_string) ||
// ignore what look like underscore delimited identifiers
/^Changed\.(\*|[a-zA-Z0-1]+)$/.test (_string) ||
// ignore Changed.[propertyName] events
/^\S*[\/\\][\w_]+[\/\\]\S*$/.test (_string) ||
// ignore what look like URL paths
/^\$?[a-zA-Z][a-z0-9]*([A-Z][a-z0-9]+)+$/.test (_string)
// ignore what look obviously like camelCase identifiers
/* TO DO: catch strings that are only numbers */
? _nonI18nStrings
: (
/[a-zA-Z]{2,}/.test (_string) &&
// string must have at least two consecutive word characters
!/^\S*[\w_]+[\/\\][\w_]+\S*$/.test (_string)
// ignore what could be short URL snippets
? (
/\b[a-zA-Z][a-z]*\s[a-z]+\s[a-zA-Z][a-z]*\b/.test (_string)
? _likelyI18nStrings
: _possibleI18nStrings
) : _likelyNonI18nStrings
)
).push (_string + ' --- ' + _stringsMap [_string]);
}
return {
logDetails:
'\t\tNON-INTERNATIONALIZABLE STRINGS\n' +
Uize.Array.Join.hugJoin (_nonI18nStrings,'\t\t\t','\n') + '\n' +
'\t\tLIKELY NON-INTERNATIONALIZABLE STRINGS\n' +
Uize.Array.Join.hugJoin (_likelyNonI18nStrings,'\t\t\t','\n') + '\n' +
'\t\tPOSSIBLY INTERNATIONALIZABLE STRINGS\n' +
Uize.Array.Join.hugJoin (_possibleI18nStrings,'\t\t\t','\n') + '\n' +
'\t\tLIKELY INTERNATIONALIZABLE STRINGS\n' +
Uize.Array.Join.hugJoin (_likelyI18nStrings,'\t\t\t','\n')
};
},
alwaysBuild:true,
dryRun:true,
rootFolderPath:_params.sourcePath,
logFilePath:_params.logFilePath
});
}
});
}
});