SOURCE CODE: Uize.Data.Simple (view docs)

/*______________
|       ______  |   U I Z E    J A V A S C R I P T    F R A M E W O R K
|     /      /  |   ---------------------------------------------------
|    /    O /   |    MODULE : Uize.Data.Simple Package
|   /    / /    |
|  /    / /  /| |    ONLINE : http://uize.com
| /____/ /__/_| | COPYRIGHT : (c)2004-2016 UIZE
|          /___ |   LICENSE : Available under MIT License or GNU General Public License
|_______________|             http://uize.com/license.html
*/

/* Module Meta Data
  type: Package
  importance: 5
  codeCompleteness: 90
  docCompleteness: 2
*/

/*?
  Introduction
    The =Uize.Data.Simple= package provides methods for parsing data represented in the *SIMPLE* format, an indentation based format for declaring data structures.

    *DEVELOPERS:* `Chris van Rensburg`
*/

Uize.module ({
  name:'Uize.Data.Simple',
  required:[
    'Uize.Str.Lines',
    'Uize.Str.Split',
    'Uize.Str.Trim'
  ],
  builder:function () {
    'use strict';

    var
      /*** Variables for Scruncher Optimization ***/
        _undefined,
        _string = 'string',

      /*** references to static methods used internally ***/
        _getIndentationLevel,
        _collapseChildren,

      /*** General Variables ***/
        _arrayDelimiter = '\\s*\\|\\s*',
        _arrayDelimiterRegExp = new RegExp (_arrayDelimiter),
        _arrayStartRegExp = new RegExp ('^' + _arrayDelimiter),
        _arrayEndRegExp = new RegExp (_arrayDelimiter + '$')
    ;

    return Uize.package ({
      getIndentationLevel:_getIndentationLevel = function (_line) {
        return _line.length - _line.replace (/^\s*/,'').length;
      },

      parse:function (_params) {
        var
          _simple = _params.simple,
          _parseName = _params.parseName !== false,
          _ignoreWhitespaceLines = _params.ignoreWhitespaceLines,
          _multiLineValueRegExp = /^([\.,~`:])\1+$/,
          _commentBlockRegExp = /^\s*#(\(+|\)+|<+|>+|\[+|\]+|\{+|\}+)$/,
          _openerToCloser = {
            '<':'>',
            '>':'<',
            '(':')',
            ')':'(',
            '[':']',
            ']':'[',
            '{':'}',
            '}':'{'
          },
          _data = {
            name:'',
            value:'',
            children:[]
          },
          _lines = Uize.Str.Lines.split (_simple),
          _linesLength = _lines.length,
          _linesLengthMinus1 = _linesLength - 1,
          _lineNo,
          _line,
          _currentLevelNo = 0,
          _currentLevel = {
            _parent:_data,
            _indenting:0,
            _nameRunningState:''
          },
          _levels = [_currentLevel]
        ;

        function _scoopUpIndentedLines () {
          var
            _result = [],
            _parentIndenting = _getIndentationLevel (_line),
            _childrenIndenting = -1,
            _childLine
          ;
          while (_lineNo < _linesLengthMinus1) {
            _lineNo++;
            if (_childLine = _lines [_lineNo]) {
              var _childLineIndenting = _getIndentationLevel (_childLine);
              if (_childrenIndenting < 0) _childrenIndenting = _childLineIndenting;
              if (_childLineIndenting <= _parentIndenting) {
                _lineNo--;
                break;
              } else {
                _result.push (_childLine.slice (_childrenIndenting));
              }
            } else {
              _result.push ('');
            }
          }
          _result.length && !_result [_result.length - 1] && _result.pop (); // remove last line, if empty

          return _result;
        }

        for (_lineNo = -1; ++_lineNo < _linesLength;) {
          if ((_line = _lines [_lineNo]) && (!_ignoreWhitespaceLines || Uize.Str.Trim.trim (_line))) {
            if (_commentBlockRegExp.test (_line)) {
              /* support for multi-line comment blocks, as in the example...
                #[
                This is a commented section
                  This is the guts of a commented section
                  More guts of a commented section
                  and yet some more guts
                  just no end to the guts
                #]

                - the comment open marker line must start with a "#" character, followed by one or more of one of the valid opener characters - "(", "[", "{", "<"

                - the comment close marker line must start with a "#" character, followed by one or more of one of the valid closer characters - ")", "]", "}", ">"

                - the comment closer character used must be matched to the comment opener character (i.e. "(" matches ")", "[" matches "]", "{" matches "}", and "<" matches ">")

                - the number of opener characters used must match the number of closer characters used (i.e. the comment opener "#<<<" matches the comment closed "#>>>", but does not match the comment closer "#>>")
              */
              var _openerChar = _line.charAt (_line.indexOf ('#') + 1);
              while (_lineNo < _linesLengthMinus1) {
                _lineNo++;
                var _commentedLine = _lines [_lineNo];
                if (
                  _commentedLine.length == _line.length && _commentBlockRegExp.test (_commentedLine) &&
                  _openerToCloser [_openerChar] == _commentedLine.charAt (_commentedLine.indexOf ('#') + 1)
                )
                  break
                ;
              }
            } else if (/^\s*###/.test (_line)) {
              /* support for commenting out a section and all children, as in...
                ### People
                  Person
                    First Name : James
                    Last Name  : Crutchley
                  Person
                    First Name : Paul
                    Last Name  : Wellstone
                  Person
                    First Name : Azemumdon
                    Last Name  : Buchakahli

                in this example, the entire key "People" is commented out using the "###" prefix
              */
              _scoopUpIndentedLines ();
            } else {
              var
                _lineUnindented = _line.replace (/^\s*/,''),
                _indenting = _line.length - _lineUnindented.length
              ;
              if (_indenting != _currentLevel._indenting) {
                if (_indenting > _currentLevel._indenting) {
                  var _children = _currentLevel._parent.children;
                  _currentLevelNo++;
                  _currentLevel = _levels [_currentLevelNo] = {
                    _parent:_children [_children.length - 1],
                    _indenting:_indenting
                  };
                } else {
                  while (_currentLevelNo && _currentLevel._indenting > _indenting) {
                    _currentLevelNo--;
                    _currentLevel = _levels [_currentLevelNo];
                  }
                }
              }
              var
                _children = _currentLevel._parent.children,
                _indexOfNewChild = _children.length,
                _name, _value
              ;
              if (_parseName) {
                var _nameValue = _lineUnindented.match (/^([^:]*[^:\s])?(?:\s*:\s*(.*))?$/);
                _name = _nameValue [1] || '';
                _value = _nameValue [2] || '';
              } else {
                _name = '';
                _value = _lineUnindented;
              }
              if (_parseName && !_name && _indexOfNewChild) {
                /*
                  the name can be ommitted on successive entries at a particular indentation level to give you a kind of running state (a convenient shorthand to avoid duplicating the same key)

                  Fruit:
                    name  : Strawberry
                    color : red
                    taste : tart
                  :
                    name  : Orange
                    color : orange
                    taste : tart, acidic, sweet
                  :
                    name  : Grapefruit
                    color : pink
                    taste : bitter

                  In this example, the "Fruit" key does not need to be repeated for successive running entries.
                */
                var _previousChild = _children [_indexOfNewChild - 1];
                _name = _previousChild.name;
                if (_name != _currentLevel._nameRunningState) {
                  _currentLevel._nameRunningState = _name;
                  !_previousChild.value && !_previousChild.children.length && _indexOfNewChild--;
                  /*
                    in the special case where the name running state changes and the first member of a particular name has an emptry string value and has no children, then that first member is essentially considered void, allowing a more elegant notation, as in the following...

                    Fruit
                    :
                      name  : Strawberry
                      color : red
                      taste : tart
                    :
                      name  : Orange
                      color : orange
                      taste : tart, acidic, sweet
                    :
                      name  : Grapefruit
                      color : pink
                      taste : bitter

                    So, the declaration for each "Fruit" member is consistent, and the first voided member serves merely to set up the member name running state. Without this special behavior, the above example would give you one "Fruit" member with no children and an empty string as its value.

                    So, then, if your intention was to have an empty first "Fruit" member, it would have to be specified as...

                    Fruit
                    :
                    :
                      name  : Strawberry
                      color : red
                      taste : tart
                    :
                      name  : Orange
                      color : orange
                      taste : tart, acidic, sweet
                    :
                      name  : Grapefruit
                      color : pink
                      taste : bitter
                  */
                }
              }
              if (_multiLineValueRegExp.test (_value)) {
                /* support for multi-line values, as in the example...
                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                  function addSeries (start,end,step) {
                    num = start;
                    var sum = 0;
                    while (num <= end) {
                      sum += num;
                      num += step;
                    }
                    return sum;
                  }
                  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

                  This syntax is ideal for specifying blocks of sample code, or other blocks of text that should not be parsed according to the simple indentation structure.
                */
                var _openerChar = _value.charAt (0);
                _value = '';
                while (_lineNo < _linesLengthMinus1) {
                  if (
                    _multiLineValueRegExp.test (
                      _lineUnindented = (_line = _lines [++_lineNo]).slice (_indenting)
                    ) &&
                    _lineUnindented.charAt (0) == _openerChar
                  ) {
                    break;
                  } else {
                    _value += _lineUnindented + '\n';
                  }
                }
              } else if (_value == ':') {
                /* support for multi-line values, as in the example...
                  Key::
                    This is an indent-based multi-line value.
                    This is the second line of the multi-line value.
                    And here's a third line.
                */
                _value = _scoopUpIndentedLines ().join ('\n');
              } else if (_value == '>') {
                /* support for simple arrays listed below key and indented, as in the example...
                  Array:>
                    Element 1 Value
                    Element 2 Value
                    Element 3 Value
                */
                _value = _scoopUpIndentedLines ();
              } else {
                if (_parseName && _value.charAt (0) == '|' && _value.slice (-1) == '|')
                  /* support for array value syntax

                    this provides a convenient way to declare simple arrays on a single line for a value, as in the example...

                    My Array: | value 1 | value 2 | value 3 | value 4 |

                  */
                  _value = Uize.Str.Split.split (
                    _value.replace (_arrayStartRegExp,'').replace (_arrayEndRegExp,''),
                    _arrayDelimiterRegExp
                  )
                ;
              }
              if (_parseName && !_name) {
                var _currentLevelParentValue = _currentLevel._parent.value;
                if (typeof _currentLevelParentValue == _string)
                  _currentLevel._parent.value = _currentLevelParentValue ? [_currentLevelParentValue] : []
                ;
                _currentLevel._parent.value.push (_value);
                /*
                  if there is no non-empty running state member name, then add this child as an element of the value array for the parent. A special case which allows us to easily create an array using the syntax...

                  My Array : value 1
                        : value 2
                        : value 3
                        : value 4

                  In this example, this special case behavior is applying to the "value 2", "value 3", and "value 4" members

                  Another form that is also permitted is...

                  My Array
                    : value 1
                    : value 2
                    : value 3
                    : value 4

                  Here, the special case behavior recognizes that the initial value of "My Array" is an empty string and omits it from the value array

                  A value array is also augmented, as in the following example...

                  My Array: | value 1 | value 2 |
                    : value 3
                    : value 4
                */
              } else {
                _children [_indexOfNewChild] = {
                  name:_name,
                  value:_value,
                  children:[]
                };
              }
            }
          }
        }
        _params.collapseChildren && _collapseChildren (_data);
        return _data;
      },

      collapseChildren:_collapseChildren = function (_data,_defaultForUndefinedValues) {
        var _dataChildren = _data.children;
        delete _data.name;
        delete _data.children;
        delete _data.value;
        if (_dataChildren) {
          var
            _properties = Uize.lookup (_undefined,0,true), // safe empty lookup object
            _totalProperties = 0
          ;
          for (var _childNo = -1, _dataChildrenLength = _dataChildren.length; ++_childNo < _dataChildrenLength;) {
            var
              _child = _dataChildren [_childNo],
              _childName = _child.name || '',
              _childValue = _child.children && _child.children.length /*|| typeof _child.value != _string*/
                ? _child
                : _child.value
            ;
            if (_childValue === _undefined) _childValue = _defaultForUndefinedValues;
            _collapseChildren (_child,_defaultForUndefinedValues);
            if (_properties [_childName]) {
              if (_properties [_childName] == 1) _data [_childName] = [_data [_childName]];
              _data [_childName].push (_childValue);
              _properties [_childName]++;
            } else {
              _totalProperties++;
              _properties [_childName] = 1;
              _data [_childName] = _childValue;
            }
          }
          if (_totalProperties == 1 && _properties [''])
            /* NOTE:
              if all of the children of an item have an empty string for the name, then the item becomes an array of the values of its children
            */
            _data = _properties [''] == 1 ? [_data ['']] : _data ['']
          ;
        }
        return _data;
      }
    });
  }
});