Source: index.js

#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
const getopts = require("getopts");

// See the csv-parse package for details
var csvparse;
try {
    csvparse = require("csv-parse/sync");
} catch(e) {
    csvparse = require("csv-parse/dist/cjs/sync.cjs");
}

var csvstringify;
try {
    csvstringify = require("csv-stringify/sync");
} catch(e) {
    csvstringify = require("csv-stringify/dist/cjs/sync.cjs");
}

// See the markdown-it package for details
const markdown = require('markdown-it')({
    html: true,
    xhtmlOut: true,
    linkify: true,
    typographer: true
});

var configPath = path.join(".", "make-data-config.json");

/** @function handleError
 * @description Report only the human-readable error message
     to console and terminate the script
 * @param {Object} e - instance of the Error constructor
 * @param {string} e.message - error message
 */
const handleError = (e) => {
    // throw e;
    console.log(`ERROR: ${e.message}`);
    process.exit();
};

/** @class SetupTool
 * @description Methods to configure
 *   the script and emit and read mapping files for
 *   conversion of human-readable court and jurisdiction
 *   descriptions into valid machine-readable data conformant
 *   to the Legal Resource Registry.
 * @param {Object} opts - command-line options set by the `getopts` package
 */
function SetupTool(opts) {
    this.configPath = configPath;
    if (opts) {
        this.opts = opts;
    }
    this.setupConfigFile();
    this.loadConfigFile();
    this.loadUseDocsOnItems();
    this.checkDefaultJurisdictionCode();
    this.validateLrrPath();
    this.loadJurisObj();
    this.extractJurisdictionNames();
    this.extractCourtNameToKeyMap();
    this.setupCourtMap();
    this.loadCourtMap();
}

/**
 * @description Write a sample config file to disk if no config file exists.
 */
SetupTool.prototype.setupConfigFile = function() {
    if (!fs.existsSync(this.configPath)) {
        fs.writeFileSync(this.configPath, JSON.stringify({
            "jurisdictionCode": "xx",
            "jurisdictionName": "Laputa",
            "jurisdictionDescPath": "/path/to/legal-resource-registry-repo"
        }, null, 2));
    };
}

/**
 * @description Load the configuration file to set values need to run
 *   the script.
 * @param {string} obj.jurisdictionCode - the short-code of the default jurisdiction
 * @param {string} obj.jurisdictionName - the long-form name of the jurisdiction
 * @param {string} obj.jurisdictionDescPath - the path to the Legal Resource Registry files on this system
 * @prop {string} defaultJurisdiction - the default jurisdiction (the short-code)
 * @prop {string} inputFileName - the input file name (derived from the long-form jurisdiction name)
 * @prop {string} defaultJurisdictionPath - the path to the default jurisdiction in the LRR (derived from the short-code)
 */
SetupTool.prototype.loadConfigFile = function() {
    var obj = JSON.parse(fs.readFileSync(this.configPath));
    this.defaultJurisdiction = obj.jurisdictionCode;
    this.inputFileName = `data-${obj.jurisdictionName.replace(/\s/g, "-").toLowerCase()}.csv`;
    this.defaultJurisdictionPath = `${obj.jurisdictionDescPath.replace(/\/$/, "")}/juris-${this.defaultJurisdiction}-desc.json`;
    console.log(`Using input data file: ${this.inputFileName}`);
}

/**
 * @description If a file ``useDocsOnItems.txt`` exists
 *   in the data directory, read it as a newline-delimited
 *   list of CultExp IDs and set their values in an
 *   array on this class instance.
 * @prop {Object[]} useDocsOnItems - a resulting array of CultExp IDs
 */
SetupTool.prototype.loadUseDocsOnItems = function() {
    var fpth = path.join(".", "useDocsOnItems.txt")
    if (fs.existsSync(fpth)) {
        console.log("Loading showDocs tag items from useDocsOnItems.txt");
        var str = fs.readFileSync(fpth).toString();    
        var arr = str.trim().split("\n").map(o => o.trim().slice(0, 5));
        // Remove duplicates
        arr.sort();
        for (var i=arr.length-1; i>0; i--) {
            if (arr[i] === arr[i-1]) {
                arr = arr.slice(0, i-1).concat(arr.slice(i));
            }
        }
        this.useDocsOnItems = arr;
    }
}

/**
 * @description Check that the ``jurisdictionCode`` value read
 *   from ``make-data-config.json`` has been changed from the
 *   sample value of "xx".
 * @throws a helpful error
 */
SetupTool.prototype.checkDefaultJurisdictionCode = function() {
    if (this.jurisdictionCode === "xx") {
        var err = new Error("set appropriate values in make-data-config.json");
        throw err;
    }
}

/**
 * @description If the data file of the default jurisdiction does
 *   not exist in the Legal Resource Registry, throw an error.
 *   This is limited to path validation, and does
 *   not extend to validation of the LRR itself.
 */
SetupTool.prototype.validateLrrPath = function() {
    if (!fs.existsSync(this.defaultJurisdictionPath)) {
        var err = new Error(`path '${this.defaultJurisdictionPath}' set from values in make-data-config.json does not exist.\n       Edit make-data-config.json and try again.`);
        throw err;
    }
}

/**
 * @description Load the default jurisdiction data.
 * @prop {Object} jurisObj - the runtime container for LRR data
 */
SetupTool.prototype.loadJurisObj = function() {
    this.jurisObj = {};
    this.jurisObj[this.defaultJurisdiction] = JSON.parse(fs.readFileSync(this.defaultJurisdictionPath).toString());
}

/**
 * @description Compose human-readable descriptive names
 *   for all subjurisdictions of the default jurisdiction,
 *   and set them as keys mapped to their machine-readable
 *   LRR codes.
 * @prop {Object} jurisdictionNames - a map of jurisdiction names to machine-readable codes
 */
SetupTool.prototype.extractJurisdictionNames = function() {
    this.jurisdictionNames = {};
    for (var key in this.jurisObj[this.defaultJurisdiction].jurisdictions) {
        // Split key
        var lst = key.split(":");
        // Look up name of each key element
        // Build an array
        // Join with |
        var accKey = [];
        var accName = [];
        for (var i=0,ilen=lst.length;i<ilen;i++) {
            elem = lst[i];
            accKey.push(elem);
            var subkey = accKey.join(":");
            var name = this.jurisObj[this.defaultJurisdiction].jurisdictions[subkey].name;
            accName.push(name);
            if (i === 0) {
                accName.push(elem.toUpperCase());
            }
        }
        if (accName.length === 2) {
            this.jurisdictionNames[accName[0]] = subkey;
        }
        this.jurisdictionNames[accName.join("|")] = subkey;
    }
}

/**
 * @description Build a one-to-one map of human-readable court names
 *   to machine-readable LRR court codes.
 * @prop {Object} courtNameMap - container for the name-to-code mapping
 */
SetupTool.prototype.extractCourtNameToKeyMap = function() {
    this.courtNameMap = {};
    for (var key in this.jurisObj[this.defaultJurisdiction].courts) {
        this.courtNameMap[this.jurisObj[this.defaultJurisdiction].courts[key].name] = key;
    }
}

/**
 * @description Create an empty court code map file if none is found.
 * @prop (string} courtMapPath - path to the court map file to be edited
 * @prop {boolean} hasCourtMapFile - flag indicating whether file has been freshly created.
 */
SetupTool.prototype.setupCourtMap = function() {
    this.courtMapPath = path.join(".", "court-code-map.json");
    if (!fs.existsSync(this.courtMapPath)) {
        fs.writeFileSync(this.courtMapPath, "[]");
        this.hasCourtMapFile = false;
    } else {
        this.hasCourtMapFile = true;
    }
}

/**
 * @description Read the array of name-to-code pairs
 *   for courts in the default jurisdiction, sorting
 *   first by the length of the name to avoid false positives,
 *   then alphabetically just as a flourish.
 * @prop {Object} courtMap - container for operator-edited
 *   court name-to-code pairings
 */
SetupTool.prototype.loadCourtMap = function() {
    this.courtMap = JSON.parse(fs.readFileSync(this.courtMapPath).toString());
    this.courtMap.sort((a,b) => {
        if (a[0].length < b[0].length) {
            return 1;
        } else if (a[0].length > b[0].length) {
            return -1;
        } else {
            if (a[0] < b[0]) {
                return 1;
            } else if (a[0] > b[0]) {
                return -1;
            } else {
                return 0;
            }
        }
    });
}

/**
 * @description Attempt to match the (possibly reprocessed)
 *   spreadsheet court value to a court registered in the
 *   LRR, and add those that fail to an array for reference.
 * @param {Object} line - the content of a spreadsheet line, keyed to
 *   column nicknames
 * @prop {Object} courtMap - container for court name-to-code pairings
 *   to be edited by the operator
 */
SetupTool.prototype.addCourtMapEntry = function(line) {
    if (!this.jurisObj[this.defaultJurisdiction].courts[line.court]) {
        if (this.courtMap.map(o => o[0]).indexOf(line.court) === -1) {
            this.courtMap.push([line.court, ""]);
        }
    }
}

/**
 * @description Check that the code element of each name-to-code pair
 *   in ``courtMap`` satifies two conditions:
 * - the code is not empty; and
 * - the code is registered in the ``courts`` segment of the default
 *   jurisdiction in the LRR
 *
 * Collect failures in two lists corresponding to the type
 * of failure, and issue a helpful warning for the latter and
 * throw a helpful error for the former.
 */
SetupTool.prototype.checkCourtMap = function() {
    var missingCodes = [], badCodes = [];
    for (var info of this.courtMap) {
        if (info[0] !== "" && info[1] === "") {
            missingCodes.push(info[0]);
        } else if (!this.jurisObj[this.defaultJurisdiction].courts[info[1]]) {
            badCodes.push(`${info[1]} (${info[0]})`);
        }
    }
    if (badCodes.length > 0) {
        console.log(`WARNING: the following courts have unrecognized identifiers in court-code-map.json:\n  ${badCodes.join("\n  ")}`);
    }
    if (missingCodes.length > 0) {
        var err = new Error(`the following courts are unmapped in court-code-map.json:\n  ${missingCodes.join("\n  ")}`);
        throw err;
    }
}

/**
 * @description Create an empty court-in-jurisdiction code map file if none is found.
 * @prop (string} courtJurisdictionMapPath - path to the court-in-jurisdiction map file to be edited
 * @prop {boolean} hasCourtJurisdictionMapFile - flag indicating whether file has been freshly created.
 */
SetupTool.prototype.setupCourtJurisdictionMap = function() {
    this.courtJurisdictionMapPath = path.join(".", "court-jurisdiction-code-map.json");
    if (!fs.existsSync(this.courtJurisdictionMapPath)) {
        fs.writeFileSync(this.courtJurisdictionMapPath, "{}");
        this.hasCourtJurisdictionMapFile = false;
    } else {
        this.hasCourtJurisdictionMapFile = true;
    }
}

/**
 * @description Read the operator-edited jurisdiction
 *   map file for reference. 
 * @prop {Object} courtJurisdictionMap - court and jurisdiction codes, keyed
 *   to the known (but invalid) values of court and jurisdiction derived from
 *   the spreadsheet
 */
SetupTool.prototype.loadCourtJurisdictionMap = function() {
    this.courtJurisdictionMap = JSON.parse(fs.readFileSync(this.courtJurisdictionMapPath).toString());
}

/**
 * @description Attempt to match the (possibly reprocessed) spreadsheet court
 *   and jurisdiction values in the ``jurisdictions`` segment of the
 *   default jurisdiction in the LRR, and add those that fail to an
 *   object for reference.
 * @prop {Object} courtJurisdictionMap - court and jurisdiction codes, keyed
 *   to the known (but invalid) values of court and jurisdiction derived from
 *   the spreadsheet
 * @see {@link SetupTool#validateJurisdictionCode}
 */
SetupTool.prototype.addCourtJurisdictionMapEntry = function(line) {
    var jurisdictionIsValid = this.validateJurisdictionCode(line.jurisdiction);
    console.log(`Adding entry to this.courtJurisdictionMap!`);
    if (!jurisdictionIsValid) {
        var key = `${line.court}::${line.jurisdiction}`;
        if (!this.courtJurisdictionMap[key]) {
            this.courtJurisdictionMap[key] = {
                court: line.court,
                jurisdiction: line.jurisdiction
            };
        }
    }
}

/**
 * @description Check that a string conforms to the syntax of
 *   a jurisdiction code, and matches a jurisdiction code
 *   in the ``jurisdictions`` segment of the LRR. Return ``true``
 *   if a match is found, otherwise ``false``.
 * @returns {boolean}
 */
SetupTool.prototype.validateJurisdictionCode = function(jurisdiction) {
    var ret = false;
    var m = jurisdiction.match(/^([.a-z]+)(:[.a-z]+)*$/);
    if (m) {
        if (this.jurisObj[m[1]] && this.jurisObj[m[1]].jurisdictions[jurisdiction]) {
            ret = true;
        }
    }
    return ret;
}

/**
 * @description A spreadsheet entry may declare a jurisdiction other
 *   than the default. Extract the top-level jurisdiction from a
 *   jurisdiction code and return its value.
 * @returns {string}
 */
SetupTool.prototype.getCurrentJurisdictionCode = function(jurisdiction) {
    var currentJurisdictionCode = this.defaultJurisdiction;
    var m = jurisdiction.match(/^([.a-z]+)(:[.a-z]+)*$/);
    if (m) {
        currentJurisdictionCode = m[1];
    }
    return currentJurisdictionCode;
}

/**
 * @description Read the data file for the current jurisdiction
 *   if it is not already set on the ``jurisObj`` object containing
 *   LRR data.
 * @param {Object} line - the content of a spreadsheet line, keyed to
 *   column nicknames
 */
SetupTool.prototype.loadDataForCurrentJurisdiction = function(line) {
    if (!line.jurisdiction) {
        line.jurisdiction = this.defaultJurisdiction;
    }
    var currentJurisdictionCode = this.getCurrentJurisdictionCode(line.jurisdiction);
    if (!this.jurisObj[currentJurisdictionCode]) {
        var currentJurisdictionDescPath = this.defaultJurisdictionPath.replace(`juris-${this.defaultJurisdiction}-desc.json`, `juris-${currentJurisdictionCode}-desc.json`);
        if (fs.existsSync(currentJurisdictionDescPath)) {
            this.jurisObj[currentJurisdictionCode] = JSON.parse(fs.readFileSync(currentJurisdictionDescPath).toString());
        }
    }
}

/**
 * @description Attempt to match the spreadsheet ``jurisdiction`` value
 *   with a jurisdiction name. If a match is found, replace the spreadsheet
 *   value with the corresponding LRR machine-readable code. (This match
 *   attempt will succeed only in the rare event that the data collector
 *   has provided Jurism-style jurisdiction values in the Jurisdiction column
 *   of the spreadsheet.)
 * @param {Object} line - the content of a spreadsheet line, keyed to 
 *   column nicknames
 * @param {string=} currentJurisdictionCode - jurisdiction code to use
 *   for the match attempt
 */
SetupTool.prototype.extractJurisdiction = function(line, currentJurisdictionCode) {
    if (!currentJurisdictionCode) {
        currentJurisdictionCode = this.defaultJurisdiction;
    }
    if (!this.jurisObj[currentJurisdictionCode].jurisdictions[line.jurisdiction]) {
        if (this.jurisdictionNames[line.jurisdiction]) {
            line.jurisdiction = this.jurisdictionNames[line.jurisdiction];
        }
    }
}

/**
 * @description Replace spreadsheet values for court, and optionally
 *   division, type, and human-readable jurisdiction with values
 *   from the operator-edited map file.
 * @param {Object} line - the content of a spreadsheet line, keyed to 
 *   column nicknames
 * @see {@link https://github.com/Juris-M/citeproc-cite-service/tree/master/cultexp#preparing-a-court-map}
 */
SetupTool.prototype.resetCourtDetails = function(line) {
    var str = line.court.trim();
    if (this.courtNameMap[str]) {
        line.court = this.courtNameMap[str];
    } else {
        for (var elem of this.courtMap) {
            if (str.toLowerCase().indexOf(elem[0].toLowerCase()) > -1) {
                line.court = elem[1];
                if (elem[2]) {
                    line.division = elem[2];
                }
                if (elem[3]) {
                    line.type = elem[3];
                }
                if (elem[4]) {
                    line.jurisdiction = elem[4];
                }
                break;
            }
        }
    }
}

/**
 * @description Replace spreadsheet court and jurisdiction values
 *   with value from the operator-edited map file.
 * @param {Object} line - the content of a spreadsheet line, keyed to 
 *   column nicknames
 * @see {@link https://github.com/Juris-M/citeproc-cite-service/tree/master/cultexp#preparing-a-court-jurisdiction-map}
 */
SetupTool.prototype.resetCourtJurisdictionDetails = function(line) {
    for (var key in this.courtJurisdictionMap) {
        var lineKey = `${line.court}::${line.jurisdiction}`;
        if (key === lineKey) {
            var info = this.courtJurisdictionMap[lineKey];
            line.court = info.court;
            line.jurisdiction = info.jurisdiction;
        }
    }
}

/**
 * @description Check the jurisdiction values in ``courtJurisdictionMap``
 *   for validity of syntax and presence in the LRR. The court value
 *   is not tested, as invalid court values are covered by a warning,
 *   and will not block processing.
 * @throws an error listing invalid jurisdictions
 * @see {@link SetupTool#validateJurisdictionCode}
 */
SetupTool.prototype.checkCourtJurisdictionMap = function() {
    var errors = [], warnings = [];
    for (var key in this.courtJurisdictionMap) {
        var info = this.courtJurisdictionMap[key];
        var jurisdictionIsValid = this.validateJurisdictionCode(info.jurisdiction);
        if (!jurisdictionIsValid) {
            errors.push(`${info.jurisdiction}`);
        }
    }
    if (errors.length > 0) {
        var err = new Error(`the following jurisdiction codes in court-jurisdiction-code-map.json\nare invalid:\n  ${errors.join("\n  ")}`);
        throw err;
    }
}

/** @class ColumnTool
 * @description Methods for sniffing column positions
 *   and for accessing column content by column nickname, while
 *   recognizing columns in arbitrary sequence within the spreadsheet.
 * @param {Object} opts - command-line options set by the `getopts` package
 */
function ColumnTool(opts) {
    this.opts = opts;
    this.colMap = [];
}

/** @instance checkCsvFilenameSanity
 * @description Check that there is one and only one
 *   CSV file in the current directory with the form
 *   "data-\<country name\>.csv".
 * * If more than one such file exists,
 *   throw an appropriate error
 * * If no such file exists, throw a different
 *   appropriate error
 */
ColumnTool.prototype.checkCsvFilenameSanity = () => {
    var filenames = fs.readdirSync(".");
    var csvOK = false;
    for (var fn of filenames) {
        if (fn.match(/^data-[-a-zA-Z]+\.csv$/)) {
            if (!csvOK) {
                csvOK = true;
            } else {
                var err = new Error("Multiple data files with name 'data-<country_name>.csv' found in this directory. Aborting.");
                throw err;
            }
        }
    }
    if (!csvOK) {
        var err = new Error("No data file with name 'data-<country_name>.csv' found. Aborting.");
        throw err;
    }
}

/**
 * @description A mapping of column nicknames to strings
 *   likely to be found in the header of each.
 */
ColumnTool.prototype.colMapHints = {
    "id": {
        str: "doc"
    },
    "date": {
        str: "date"
    },
    "jurisdiction": {
        str: "jurisdiction"
    },
    "court": {
        str: "court"
    },
    "division": {
        str: "division"
    },
    "type": {
        str: "type"
    },
    "docketno": {
        str: "number"
    },
    "name": {
        str: "name"
        
    },
    "year-as-volume": {
        str: "year"
    },
    "volume": {
        str: "volume"
    },
    "reporter": {
        str: "reporter"
    },
    "page": {
        str: "page"
    },
    "expert-presence": {
        str: "presence"
    },
    "instructedby": {
        str: "instructed"
    },
    "link": {
        str: "link"
    },
    "keywords": {
        str: "keywords"
    },
    "area": {
        str: "area"
    },
    "summary": {
        str: "summary"
    },
    "lang": {
        str: "language"
    }
}

/**
 * @description For each column label in the spreadsheet
 *   headline, check for its match string among those corresponding
 *   to column nicknames. If found, add its nickname to an array
 *   of column nicknames. Otherwise add null to the array to flag
 *   it as irrelevant.
 * @param {Object[]} headline - an array representing one spreadsheet line
 * @prop {Object[]} colMap - an array of column nicknames, in the
 *   order of the corresponding columns in the spreadsheet
 * @prop {Object[]} headline - the original headline values in array
 *   format, for use in dumping normalized content as CSV
 */
ColumnTool.prototype.setColMap = function(headline) {
    this.headline = headline;
    for (var i=0,ilen=headline.length;i<ilen;i++) {
        var val = headline[i] ? headline[i].toLowerCase() : "";
        var foundIt = false;
        for (var key in this.colMapHints) {
            var srch = this.colMapHints[key].str;
            if (val.indexOf(srch) > -1) {
                this.colMap.push(key);
                delete this.colMapHints[key];
                foundIt = true;
                break;
            }
        }
        if (!foundIt) {
            this.colMap.push(null);
        }
    }
}

/**
 * @description Check that all expected columns are present
 *   in the spreadsheet, and throw an error if they are not.
 * @throws a helpful error on the first column identified as missing
 */
ColumnTool.prototype.checkColMap = function() {
    if (!this.opts.quiet && !this.opts.Quiet) {
        for (var key in this.colMapHints) {
            if (this.colMap.indexOf(key) === -1) {
                var err = new Error(`No column found for: ${key}`);
                throw err;
            }
        }
    }
};

/**
 * @description Copy each labeled cell of a spreadsheet line
 *   to an object with the column nickname as its key,
 *   and return the object.
 * @param {Object[]} record - an array representing one spreadsheet line
 * @returns {Object}
 */
ColumnTool.prototype.loadLine = function(record) {
    var ret = {};
    for (var i=0,ilen=this.colMap.length; i<ilen; i++) {
        var key = this.colMap[i];
        if (!key) continue;
        if (record[i]) {
            ret[key] = record[i].trim();
        } else {
            ret[key] = record[i].trim();
        }
    }
    return ret;
}

ColumnTool.prototype.unLoadLine = function(line) {
    var unline = [];
    for (var i=0; i<this.colMap.length; i++) {
        unline.push("");
    }
    for (var i=0; i<this.colMap.length; i++) {
        var key = this.colMap[i];
        unline[i] = line[key];
    }
    return unline;
}

/**
 * @description Read the spreadsheet file, normalize its line endings,
 *   and parse the content to an array of arrays. Analyze the first
 *   line to match columns to nicknames, and discard the headline.
 * @param {str} csvFilePath - path to CSV file to use as input
 * @see {@link ColumnTool#setColMap}
 * @see {@link ColumnTool#checkColMap}
 */
ColumnTool.prototype.getSpreadsheetArrays = function(csvFilePath) {
    var ret = [];
    var firstRecord = true;
    var txt = fs.readFileSync(csvFilePath).toString();
    txt = txt.split(/[\n\r]+/).filter(line => line.trim() ? line : false).join("\n");
    var arr = csvparse.parse(txt);
    for (var record of arr) {
        if (firstRecord) {
            if (record[0] || record[1]) {
                this.setColMap(record);
                this.checkColMap();
                firstRecord = false;
            }
        } else {
            ret.push(record);
        }
    }
    return ret;
}

/** @class Compositor
 * @description Methods for composing CSL-M JSON items
 *   based on spreadsheet content.
 * @param {Object} opts - command-line options set by the `getopts` package
 * @param {string} defaultJurisdiction - short-code of the default jurisdiction
 * @param {Object[]} useDocsOnItems - CultExp IDs of items to be tagged with ``showDocs``
 * @prop {Object} opts - command-line options set by the `getopts` package
 * @prop {string} defaultJurisdiction - short-code of the default jurisdiction
 * @prop {Object[]} useDocsOnItems - CultExp IDs of items to be tagged with ``showDocs``
 */
function Compositor(opts, defaultJurisdiction, useDocsOnItems) {
    this.opts = opts;
    this.defaultJurisdiction = defaultJurisdiction;
    this.useDocsOnItems = useDocsOnItems;
}
    
/**
 * @description Combines several actions:
 * - Check for existence of a ``files`` subdirectory containing
 *   attachment files, and throw an error if it does not exist
 * - Copy an ``empty.pdf`` file into the ``files`` subdirectory
 *   for possible use as a placeholder
 * - Finally, return the path to the ``files`` subdirectory,
 *   or to a file it contains, if specified.
 * @param {string=} filename - name of a specific attachment file
 */
Compositor.prototype.filesPath = function(filename) {
    var pth = path.join(".", "files");
    if (!fs.existsSync(pth)) {
        var err = new Error(`Required subdirectory ./files does not exist. Create subdirectory and populate with attachment PDF files.`);
        throw err;
    }
    if (!fs.existsSync(path.join(pth, "empty.pdf"))) {
        var emptyPath = path.join(__dirname, "test", "test-files", "empty.pdf");
        fs.copyFileSync(emptyPath, path.join(pth, "empty.pdf"));
    }
    if (filename) {
        return path.join(pth, filename);
    } else {
        return pth;
    }
}

/**
 * @description A list of tag prefixes to be set on
 *   tags drawn from designated columns identified
 *   by column nickname.
 */
Compositor.prototype.tagsMap = [
    {
        prefix: "EP",
        nickname: "expert-presence"
    },
    {
        prefix: "IB",
        nickname: "instructedby"
    },
    {
        prefix: "KW",
        nickname: "keywords"
    },
    {
        prefix: "AL",
        nickname: "area"
    }
];

/**
 * @description Compose tags as a comma-delimited list,
 *   setting the appropriate prefix on tags drawn from
 *   spreadsheet columns. Columns containing a comma-
 *   or semicolon-delimited list of tags are split
 *   before processing, so that prefixes are applied
 *   to all such tags.
 * @param {Object} line - the content of a spreadsheet line, keyed to 
 *   column nicknames
 * @param {Object[]=!} extraTags - additional tags not included in the spreadsheet
 * @returns {string}
 */
Compositor.prototype.getTags = function(line, extraTags) {
    var ret = [];
    if (extraTags) {
        ret = extraTags;
    }
    for (var info of this.tagsMap) {
        var str = line[info.nickname];
        if (str) {
            str = str.trim();
        }
        if (str) {
            var lst = str.split(/\s*[,;]\s*/);
            for (var tag of lst) {
                ret.push(info.prefix + ":" + tag);
            }
        }
    }
    return ret.filter(o => o).join(",");
}

/**
 * @description Attempt to convert the content of the spreadsheet "date" column
 *   to a valid date in CSL JSON array format. Spreadsheet dates should be set in
 *   YYYY-MM-DD format to avoid ambiguity. Where dates not in this format are
 *   invalid, impossible, or ambiguous, an error is thrown.
 * @param {string} docID - the "id" value from the spreadsheet
 * @param {string} str - a string to parse as a date
 * @returns {Object}
 */
Compositor.prototype.getDate = function(docID, str) {
    if (!str || ["undated", "no date"].indexOf(str.toString().toLowerCase()) > -1) {
        return null;
    }
    var ret = [];

    var lst = str.toString().split(/[-\/\.\,]/);
    var validDate = true;
    if (lst.length > 3) {
        validDate = false;
    }
    for (var i=0,ilen=lst.length;i<ilen;i++) {
        var elem = lst[i];
        if (!elem.match(/[0-9]+/)) {
            validDate = false;
        }
    }
    if (!validDate) {
        var err = new Error(`invalid date "${str}" at ${docID}`);
        throw err;
    }
    // Sniff pattern
    var dateType;
    for (var i=0,ilen=2;i<ilen;i++) {
        if (i === 1) {
            lst.reverse();
        }
        dateType = i;
        if (lst[0].match(/[0-9]{4}/)) {
            break;
        }
    }
    if (lst[0].length !== 4 || parseInt(lst[1], 10) > 12 || parseInt(lst[2], 10) > 31) {
        var err = new Error(`impossible date "${str}" at ${docID}`);
        throw err;
    }
    if (dateType === 1) {
        if (parseInt(lst[2]) < 13) {
            var err = new Error(`ambiguous date "${str}" at ${docID}`);
            throw err;
        }
    }
    ret.push(lst[0].replace(/^0+/, ""));
    if (lst[1]) {
        ret.push(lst[1].replace(/^0+/, ""));
    }
    if (lst[2]) {
        ret.push(lst[2].replace(/^0+/, ""));
    }
    return {
        "date-parts": [ret]
    };
}

/**
 * @description Trim leading a trailing space from string
 *   and return.
 * @param {string} str - abstract value from spreadsheet
 * @returns {string}
 */
Compositor.prototype.getAbstract = function(str) {
    var ret = str.trim();
    return ret;
}

/**
 * @description Return the first five characters of
 *   a docID as the item identifier. (The code to append
 *   the suffix value of an ID coded as an appellate
 *   judgment has been commented out, because in
 *   practice it appears that documents with these
 *   suffixed IDs appear in the spreadsheets only
 *   as child attachments of an item without suffix
 *   representing a trial judgment. If that is not
 *   always the case, adjustments of some sort
 *   might be required.)
 * @param {string} str - a CultExp docID
 */
Compositor.prototype.getRootID = function(str) {
    str = str.trim();
    var root = str.slice(0, 5);
    var suffix = str.slice(5);
    //if (["A", "B", "C", "D"].indexOf(suffix) > -1) {
    //    root = `${root}${suffix}`;
    //}
    return root;
}

/**
 * @description Monolithic method to compose CSL JSON
 *   attachment metadata for insertion into an item object.
 *   A few highlights:
 * - the ``empty.pdf`` placeholder is not added
 * - root CultExp ID extensions of A-D to indicate levels of appeal,
 *   ER to indicate an expert report, and a-z to disambiguate
 *   multiple expert reports, and be used solo or in combination
 * - expert reports and judgments are tagged as such
 * @param {Object} line - the content of a spreadsheet line, keyed to 
 *   column nicknames
 * @see {@link Compositor#getAbstract}
 * @see {@link Compositor#filesPath}
 */
Compositor.prototype.composeAttachment = function(line) {
    var fileCode = line.id;
    var note = this.getAbstract(line.summary);
    if (note) {
        note = markdown.render(note);
    }
    var attachments = [];
    var fn = `${fileCode}.pdf`;
    var fns = [fn];
    if (!fs.existsSync(this.filesPath(fn))) {
        // fns = ["empty.pdf"];
        fns = [];
    }
    for (var ext of ["rtf", "txt"]) {
	    fn = `${fileCode}.${ext}`;
        if (fs.existsSync(this.filesPath(fn))) {
	        fns.push(fn);
	    }
    }
    for (var fn of fns) {
        var suffix = fileCode.slice(5).replace(/ER[a-z]?$/, "");
        var reportflag = fileCode.slice(5).replace(/.*(ER[a-z]?)$/, "$1");
        if (!reportflag) {
            if (!suffix | ["A", "B", "C", "D"].indexOf(suffix) > -1) {
                attachments.push({
                    path: this.filesPath(fn),
                    title: fn,
                    tags: [`LN:${line.lang}`, "TY:judgment"]
                });
            } else {
                var err = new Error(`Oops on suffix="${suffix}" from line.id="${line.id}"`);
                throw err;
            }
        }
        if (reportflag.slice(0, 2) === "ER") {
            attachments.push({
                path: this.filesPath(fn),
                title: fn,
                note: note,
                tags: [`LN:${line.lang}`, "TY:report"]
            });
        }
    }
    return {
        attachments: attachments,
        tags: []
    };
}

/**
 * @description Compose a CSL-M JSON item.
 * @param {Object} line - the content of a spreadsheet line, keyed to 
 *   column nicknames
 * @param {boolean} suppressAbstract - omit the Abstract value from the composed item
 * @see {@link https://docs.citationstyles.org/en/stable/specification.html}
 * @see {@link https://citeproc-js.readthedocs.io/en/latest/csl-m/index.html}
 */
Compositor.prototype.composeItem = function(line, suppressAbstract) {
    var item = {
		type: "legal_case",
		multi: {
			main: {},
			_keys: {}
		}
    };
    item["id"] = line.id.trim().slice(0, 5);
    item["call-number"] = line.id.trim();
    if (line.docketno) {
        var offset = -1;
        if (this.opts.lstripto) {
            var str = this.opts.lstripto;
            var offset = line.docketno.indexOf(str);
        }
        if (offset > -1) {
            offset = offset + str.length;
            item["number"] = line.docketno.slice(offset);
        } else {
            item["number"] = line.docketno;
        }
    }
    if (line.type) {
        item["genre"] = line.type;
    }
    if (line.division) {
        item["division"] = line.division;
    }
    if (line["year-as-volume"]) {
        item["collection-number"] = line["year-as-volume"];
    }
    if (line.volume) {
        item.volume = line.volume;
    }
    if (line["reporter"]) {
        item["container-title"] = line["reporter"];
    }
    if (line.page) {
        item.page = line.page;
    }
    var date = this.getDate(item["call-number"], line.date);
    if (date) {
        item["issued"] = date;
    }
    if (line.court) {
        item["authority"] = line.court;
    }
    if (line.jurisdiction) {
        item["jurisdiction"] = line.jurisdiction;
    }
    if (!suppressAbstract) {
        item["abstract"] = this.getAbstract(line.summary);
    }
    item["language"] = line.lang;
    var info = this.composeAttachment(line);
    info.tags.push(`cn:${this.defaultJurisdiction.toUpperCase()}`);
    if (this.useDocsOnItems) {
        if (this.useDocsOnItems.indexOf(item["call-number"]) > -1) {
            info.tags.push("showDocs");
        }
    }
    item["attachments"] = info.attachments;
    item["tags"] = this.getTags(line, info.tags);
    return item;
}

/**
 * @description Monolithic script runner to transform
 *   spreadsheet content in to well structured CSL-M JSON
 *   for import into Jurism.
 * @param {Object} opts - command-line options set by the `getopts` package
 * @see {@link SetupTool}
 * @see {@link ColumnTool}
 * @see {@link Compositor}
 */
function run(opts) {
    var setupTool = new SetupTool(opts);
    var columnTool = new ColumnTool(opts);
    var compositor = new Compositor(opts, setupTool.defaultJurisdiction, setupTool.useDocsOnItems);
    var acc = {};
    var ret = [];
    columnTool.checkCsvFilenameSanity();
    
    var csvFilePath = `${setupTool.inputFileName}`;
    var arrays = columnTool.getSpreadsheetArrays(csvFilePath);
    var lines = [];
    for (var record of arrays) {
        lines.push(columnTool.loadLine(record));
    }
    for (var line of lines) {
        if (!setupTool.hasCourtMapFile) {
            setupTool.addCourtMapEntry(line);
        }
    }
    if (!setupTool.hasCourtMapFile) {
        fs.writeFileSync(setupTool.courtMapPath, JSON.stringify(setupTool.courtMap, null, 2));
    }
    setupTool.checkCourtMap();
    
    setupTool.setupCourtJurisdictionMap();
    setupTool.loadCourtJurisdictionMap(line);
    for (var line of lines) {
        setupTool.resetCourtDetails(line);
        setupTool.resetCourtJurisdictionDetails(line);
        setupTool.extractJurisdiction(line);
        setupTool.loadDataForCurrentJurisdiction(line);
        setupTool.extractJurisdiction(line);
        if (!setupTool.hasCourtJurisdictionMapFile) {
            setupTool.addCourtJurisdictionMapEntry(line);
        }
    }
    if (!setupTool.hasCourtJurisdictionMapFile) {
        fs.writeFileSync(setupTool.courtJurisdictionMapPath, JSON.stringify(setupTool.courtJurisdictionMap, null, 2));
    }
    setupTool.checkCourtJurisdictionMap();
        
    for (var line of lines) {
        var rootID = compositor.getRootID(line.id);
        if (line.court === "Court" && line.jurisdiction === "Jurisdiction") {
            continue;
        }
        // Compose the item and add attachments
        if (acc[rootID]) {
            var newAttachments = compositor.composeAttachment(line).attachments;
            acc[rootID].attachments = acc[rootID].attachments.concat(newAttachments);
        } else {
            var suppressAbstract = false;
            var suffix = line.id.slice(5);
            if (suffix && suffix.slice(0, 2) === "ER") {
                suppressAbstract = true;
            }
            var item = compositor.composeItem(line, suppressAbstract);
            acc[rootID] = item;
        }
    }
    for (var id in acc) {
        ret.push(acc[id]);
    }
    fs.writeFileSync(path.join(".", "import-me.json"), JSON.stringify(ret, null, 2));

    if (opts.c) {
        var arrLines = [columnTool.headline];
        for (var line of lines) {
            arrLines.push(columnTool.unLoadLine(line));
        }
        var csv = csvstringify.stringify(arrLines);
        var ext = opts.c.slice(-4);
        if (ext !== ".csv" && ext !== ".CSV") {
            var err = new Error("Argument to -c (--csv) must be a filename ending in .csv");
            throw err;
        }
        if (fs.existsSync(opts.c)) {
            var err = new Error(`CSV output file ${opts.c} already exists. Aborting.`);
            throw err;
        } else {
            fs.writeFileSync(opts.c, csv);
        }
    }
    console.log("END");
};

if (require.main === module) {
    
    const optParams = {
        alias: {
            c : "csv",
            L : "lstripto",
            q : "quiet",
            Q : "Quiet",
            v : "version",
            h: "help"
        },
        string: ["L=", "c="],
        boolean: ["q", "Q", "h"],
        unknown: option => {
            console.log("unknown option \"" +option + "\"");
            process.exit();
        }
    };

    const usage = "Usage: " + path.basename(process.argv[1]) + " [options]\n"
      + "  -c, --csv FILENAME\n"
      * "    Dump normalized data to the specified file, which must not exist"
      + "  -L, --lstripto STR\n"
      + "    Remove text from left of number field to designated string.\n"
      + "  -q, --quiet\n"
      + "    Suppress only empty-court warnings.\n"
      + "  -Q, --Quiet\n"
      + "    Suppress all warnings, show only errors.\n"
      + "  -v, --version\n"
      + "    Show script version..\n"
      + "  -h, --help\n"
      + "    This help.\n"
      + "To add a \"showDocs\" tag to selected items, place a file\n"
      + "\"useDocsOnItems.txt\" file in the directory with the spreadsheet\n"
      + "for this jurisdiction. The file should contain a newline-delimited\n"
      + "list of CultExp document codes. The tag will be applied to the\n"
      + "parent item of the relevant document attachments.\n"

    const opts = getopts(process.argv.slice(2), optParams);

    if (opts.v) {
        var pkg = JSON.parse(fs.readFileSync(path.join(__dirname, "package.json")).toString());
        console.log(pkg.version);
        process.exit();
    }

    for (var opt of optParams.string) {
        opt = opt.slice(0, opt.length-1);
        if (typeof opts[opt] === "boolean") {
            console.log(`Option -${opt} (--${optParams.alias[opt]}) requires a string argument`);
            process.exit()
        }
    }

    if (opts.h) {
        console.log(usage);
        process.exit();
    }
    try {
        run(opts);
    } catch (e) {
        handleError(e);
    }
} else {
    module.exports = run;
}