index.js

/**
 * @file CSV Types.
 * @author nozalr <nozalr@group4layers.com> (Group4Layers®).
 * @copyright 2017 nozalr (Group4Layers®).
 * @license MIT
 * @version 0.3.1
 * @description
 *
 * This docs are generated from the source code, and therefore, they are concise and simple.
 *
 * **For a complete documentation full of examples, use cases, general overview and the most
 * important points of the API, please, see the README.md in [csv-types-js](https://github.com/Group4Layers/csv-types-js)**.
 */

/**
 * @typedef {Object} Options - Valid options to be configured.
 * @prop {function} fail - function to fail (error is capturable).
 * @prop {boolean} trim - trim space in value (headers are always trimmed).
 * @prop {boolean} trimEscaped - trim s
 * @prop {boolean} trimEscaped - trim space in those escaped values (eg. " a " to "a").
 * @prop {boolean} types - use types (allows multiple definitions per string).
 * @prop {boolean} headers - you can omit headers when used with no types (flexible values).
 * @prop {boolean} firstLineHeader - headers are in the first not empty line (and not commented).
 * @prop {char} delimiter - column character delimiter.
 * @prop {char} escape - column escape character.
 * @prop {char} comment - comment char (omits the line).
 * @prop {(boolean|function)} cast - cast function for every value (by default false: no casting).
 * @prop {(boolean|function)} row - row function for every row values.
 * @description
 *
 * Options to be configured in CSV.
 *
 * By default:
 *
 * ```JavaScript
 * const opts = {
 *   fail: function(m){
 *     console.log(m);
 *     return {
 *       fail: m,
 *     };
 *   },
 *   trim: true,
 *   trimEscaped: false,
 *   types: false,
 *   headers: true,
 *   firstLineHeader: false,
 *   delimiter: ',',
 *   escape: '"',
 *   comment: '#',
 *   cast: false,
 *   row: false,
 * };
 * ```
 *
 * If the cast function receives `true` it casts values that match the regexp `/^[-+]?[\d.]+$/` to numbers. Those that do not match are not casted, so, they are considered strings.
 *
 * The option `firstLineHeader` only works if `headers` is true.
 *
 * The option `headers` only works if `types` is false (because types needs headers always).
 *
 * The cast function receives this parameters:
 * - `value` (`any`): the value (after the trimming, if applicable)
 * - `isHeader` (`bool`): true if it is a header or not
 * - `type` (`string`): type of the row (receives an empty string `''` if types are not used)
 * - `column` (`int`): the column index starting from 0 (the first)
 * - `row` (`int`): the row index starting from 0 (the first).
 *
 * And the value returned is inserted as the column value.
 *
 * ```js
 * function cast(value, isHeader, type, column, row){
 *   // the return value is used for this column
 * }
 * ```
 *
 * The row function is not called for the headers and it receives this parameters:
 * - `value` (`any[]`): array of values
 * - `type` (`string`): type of the row (receives `''` if no types)
 * - `definition` (`Definition{}`): the global object with definitions (headers) and values so far
 * - `row` (`int`): the row index starting from 0 (the first)
 *
 * And if `false` is returned, the row is not inserted in `values`.
 *
 * ```js
 * function row(value, type, definition, row){
 *   // if false is returned, the row is omitted
 * }
 * ```
 */

/**
 * @private
 * @type Options
 */
const defOpts = {
  /**
   * @param {string} m - message that shows the failing.
   * @returns {any} Returned value from the capturable failing.
   */
  fail: function(m){
    console.log(m);
    return {
      fail: m,
    };
  },
  trim: true,
  trimEscaped: false,
  types: false,
  headers: true,
  firstLineHeader: false,
  delimiter: ',',
  escape: '"',
  comment: '#',
  cast: false,
  row: false,
};


/**
 * Data structure produced by the parser (per values-headers found).
 * @typedef {Object} Definition - Data structure.
 * @prop {string[]} headers - can be empty if no headers.
 * @prop {int} hlength - number of headers.
 * @prop {any[]} values - can be empty if no values.
 * @prop {int} vlength - number of values (rows).
 */

/**
 * @class
 * @see Options
 * @see Definition
 */
class CSV {
  /**
   * Constructor. It accepts an Options object.
   * @param {(Options|null)} cfg - Options to be configured.
   * @see Options
   */
  constructor(cfg){
    this.configure(cfg);
  }
  /**
   * Configure the options.
   * @param {(Options|null)} cfg - Options to be configured.
   * @see Options
   */
  configure(cfg){
    if (cfg == null){
      cfg = {};
    }
    for (let opt in defOpts){
      let value = cfg[opt];
      this['_' + opt] = value != null ? value : defOpts[opt];
    }
    if (typeof this._fail !== 'function'){
      this._fail = defOpts.fail;
    }
    if (typeof this._trim !== 'boolean'){
      this._trim = defOpts.trim;
    }
    if (typeof this._trimEscaped !== 'boolean'){
      this._trimEscaped = defOpts.trimEscaped;
    }
    if (typeof this._types !== 'boolean'){
      this._types = defOpts.types;
    }
    if (typeof this._headers !== 'boolean'){
      this._headers = defOpts.headers;
    }
    if (typeof this._firstLineHeader !== 'boolean'){
      this._firstLineHeader = defOpts.firstLineHeader;
    }
    if (typeof this._delimiter !== 'string' || this._delimiter.length !== 1){
      this._delimiter = defOpts.delimiter;
    }
    if (typeof this._escape !== 'string' || this._escape.length !== 1){
      this._escape = defOpts.escape;
    }
    if (typeof this._comment !== 'string' || this._comment.length !== 1){
      this._comment = defOpts.comment;
    }
    if (this._types){
      this._headers = true;
    }
    if (!this._headers){
      this._firstLineHeader = false;
    }
    if (typeof this._cast !== 'function'){
      if (this._cast === true){
        this._cast = casters.number;
      }else{
        this._cast = false;
      }
    }
    if (typeof this._row !== 'function'){
      this._row = false;
    }
  }
  /**
   * Parse the input string based on the options configured.
   * @param {string} str - input string to be parsed (CSV string).
   * @returns {Definition|any} Data structure produced (or return value from fail function).
   * @see Definition
   * @see Options
   */
  parse(str){
    let types = {};

    let lDefType = null;

    let lTyped = false; // if the row got the type
    let lType = null;
    let lArray = null;
    let lArrayLen = 0;
    let lStr = null;
    let lEsc = false; // if has escape double quote ""
    let lEscOpen = false;
    let lValues = 0; // rows in values

    let line = 1;
    let lineI = 0;

    let lastNL = null; // append a last newline when EOF, to append last locals/tmp

    const optTypes = this._types;
    const optHeaders = this._headers;
    let headerParsed = false; // only for optTypes == false
    const optCast = this._cast;
    const optRow = this._row;
    const optDel = this._delimiter;
    const optEsc = this._escape;
    const optComm = this._comment;

    let optFirstHeader = this._firstLineHeader;

    let i = 0;
    let cont = true;
    let state = 0;
    while(cont){
      let char = str[i];
      if (lastNL) {
        char = lastNL;
        lastNL = false; // finish
      }
      if (char){
        // uncomment for debug:
        // console.log(`${i} ${state}: '${char}'`)
        switch(state){
        case 0: // read
          if (char === optComm){
            if (optTypes && str.substr(i+1, 5) === 'type-'){
              state = 1; // header
              i += 5;
              lType = '';
              lStr = '';
            } else if (!optTypes && !headerParsed) {
              state = 1;
              lType = '';
              lTyped = true;
              lArray = [];
              lArrayLen = 0;
              lStr = '';
            }else{
              state = 3; // omit until EOL
            }
          }else if (char === ' ' || char === '\t'){ // omit

          }else if (char === '\n'){ // omit
            line++;
            lineI = i + 1;
          }else if (optTypes && char === 't'){
            if (str.substr(i, 5) === 'type-'){
              state = 2; // value
              i += 4;
              lType = '';
              lStr = '';
            }else{
              return this._fail(`invalid row value in line ${line}:\n${str.substr(lineI, i - lineI + 1)}`);
            }
          }else{ // previously: else if (!optTypes)
            if (char === optEsc){
              lEsc = true;
              lEscOpen = true;
              // i++;
            }else if (headerParsed){
              state = 2; // value
              lType = '';
              lTyped = true;
              lArray = [];
              lArrayLen = 0;
              lDefType = types[''];
              lStr = char;
            }else if (optFirstHeader){
              state = 1;
              lType = '';
              lTyped = true;
              lArray = [];
              lArrayLen = 0;
              lStr = char;
            }else if (!optHeaders){
              state = 2; // value
              lType = '';
              lTyped = true;
              lArray = [];
              lArrayLen = 0;
              if (lDefType == null){
                lDefType = types[''] = {
                  headers: [],
                  hlength: 0,
                  values: [],
                  vlength: 0,
                };
              }
              lStr = char;
            }else{ // previously: else if (!headerParsed)
              return this._fail(`invalid row (no header definition) in line ${line} col ${i - lineI + 1}:\n${str.substr(lineI, i - lineI + 1)}`);
            }
          }
          break;
        case 1: // header
          if (char === '\n' || char === optDel){
            if (lStr === ''){
              return this._fail(`invalid header '${lStr}' in line ${line} col ${i - lineI + 1}:\n${str.substr(lineI, i - lineI + 1)}`);
            }else{
              if (lTyped){
                let value = lStr.trim();
                lArray.push(optCast ? optCast(value, true, lType, lArrayLen, 0) : value);
                lArrayLen++;
              }else{
                lType = lStr;
                lTyped = true;
                lStr = '';
                lArray = [];
                lArrayLen = 0;
              }
            }
            if (char === optDel){
              lStr = '';
            }else{ // previously: else if (char === '\n'){
              if (types[lType]){
                return this._fail(`previous header definition for '${lType}' in line ${line} col ${i - lineI + 1}:\n${str.substr(lineI, i - lineI + 1)}`);
              }
              if (!optHeaders){
                lArrayLen = 0;
                lArray = [];
              }
              types[lType] = {
                headers: lArray,
                hlength: lArrayLen,
                values: [],
                vlength: 0,
              };
              lValues = 0;
              lType = null;
              lArray = null;
              lArrayLen = 0;
              lStr = null;
              lTyped = false;
              state = 0;
              line++;
              lineI = i + 1;
              if (!optTypes){
                headerParsed = true;
              }
            }
          }else{
            lStr += char;
          }
          break;
        case 2: // value
          if (char === '\n' || char === optDel || char === optEsc){
            if (lEscOpen){
              if (char === optDel){
                lStr += char;
              }else if (char === optEsc){
                lEscOpen = false;
              }else{ // previously: else if (char === '\n')
                return this._fail(`invalid row with open escaped char ${optEsc} and reach EOL in line ${line} col ${i - lineI + 1}:\n${str.substr(lineI, i - lineI + 1)}`);
              }
            }else{
              if (lTyped){
                let value = lStr;
                if (lEsc){
                  if (this._trimEscaped){
                    value = lStr.trim();
                  }
                }else if (this._trim){
                  value = lStr.trim();
                }
                lArray.push(optCast ? optCast(value, false, lType, lArrayLen, lValues) : value);
                lArrayLen++;
                lEsc = false;
                lEscOpen = false;
              }else{
                if (optTypes){ // can !optTypes come here?
                  lDefType = types[lStr];
                  // lDefType = types[''];
                }
                if (!lDefType){
                  return this._fail(`invalid type (no header definition) '${lStr}' in line ${line} col ${i - lineI + 1}:\n${str.substr(lineI, i - lineI + 1)}`);
                }
                lType = lStr;
                lTyped = true;
                lStr = '';
                lArray = [];
                lArrayLen = 0;
                lEsc = false;
                lEscOpen = false;
              }
              if (char === optDel){
                lStr = '';
                if (str[i+1] === optEsc){
                  lEsc = true;
                  lEscOpen = true;
                  i++;
                }
              }else{ // previously: else if (char === '\n'){
                if (optHeaders && lDefType.hlength !== lArrayLen){
                  return this._fail(`invalid row length ${lArrayLen} (header length ${lDefType.hlength}) in line ${line} col ${i - lineI + 1}:\n${str.substr(lineI,  i - lineI + 1)}`);
                }
                let insert = optRow ? optRow(lArray, lType, lDefType, lValues) !== false : true;
                if (insert){
                  lDefType.values.push(lArray);
                  lValues++;
                  lDefType.vlength++;
                }
                lType = null;
                lArray = null;
                lArrayLen = 0;
                lStr = null;
                lTyped = false;
                lEsc = false;
                lEscOpen = false;
                state = 0;
                line++;
                lineI = i + 1;
              }
            }
          }else{
            if (lEscOpen){
              if (char === '\\' && str[i+1] === optEsc){
                char = optEsc;
                i++; // omit next
              }
            }
            lStr += char;
          }
          break;
        case 3: // next until EOL
          if (char === '\n'){
            state = 0;
          }
          break;
        }
      }else{ // EOF
        if (lastNL == null){ // avoid problems when EOF is in the last row (no next line)
          lastNL = '\n';
          i = i - 1;
        }else{ // now can exit
          cont = false;
        }
      }
      i++;
    }

    if (!optTypes){
      types = types[''];
    }

    return types;
  }
}

const casters = {
  /**
   * Number caster function. It returns a string or a number.
   * It casts the string to a number if it matches this regexp: /^[-+]?[\d.]+$/.
   * @param {string} value - value to be casted.
   * @param {boolean} isHeader - the value comes from a header.
   * @returns {(string|number)} Casted value.
   */
  number: function(value, isHeader){
    let ret = value;
    if (!isHeader){
      if (/^[-+]?[\d.]+$/.test(value)){
        ret = Number(value);
      }
    }
    return ret;
  }
};

/**
 * CSV Types module.
 * @module csv-types
 */
module.exports = {
  /**
   * CSV Types version.
   */
  version: { major: 0, minor: 3, patch: 1 },
  /**
   * CSV class.
   */
  CSV,
  /**
   * CSV Types casters (expose global casting functions).
   */
  casters
};