enigma-bbs/core/files_bbs_file.js

/* jslint node: true */
'use strict';

const { Errors }    = require('./enig_error.js');

//  deps
const fs            = require('graceful-fs');
const iconv         = require('iconv-lite');
const moment        = require('moment');

//  Descriptions found in the wild that mean "no description" /facepalm.
const IgnoredDescriptions = [
    'No description available',
    'No ID File Found For This Archive File.',
];

module.exports = class FilesBBSFile {
    constructor() {
        this.entries = new Map();
    }

    get(fileName) {
        return this.entries.get(fileName);
    }

    getDescription(fileName) {
        const entry = this.get(fileName);
        if(entry) {
            return entry.desc;
        }
    }

    static createFromFile(path, cb) {
        fs.readFile(path, (err, descData) => {
            if(err) {
                return cb(err);
            }

            //  :TODO: encoding should be default to CP437, but allowed to change - ie for Amiga/etc.
            const lines = iconv.decode(descData, 'cp437').split(/\r?\n/g);
            const filesBbs = new FilesBBSFile();

            const isBadDescription = (desc) => {
                return IgnoredDescriptions.find(d => desc.startsWith(d)) ? true : false;
            };

            //
            //  Contrary to popular belief, there is not a FILES.BBS standard. Instead,
            //  many formats have been used over the years. We'll try to support as much
            //  as we can within reason.
            //
            //  Resources:
            //  - Great info from Mystic @ http://wiki.mysticbbs.com/doku.php?id=mutil_import_files.bbs
            //  - https://alt.bbs.synchronet.narkive.com/I6Vrxq6q/format-of-files-bbs
            //
            //  Example files:
            //  - https://github.com/NuSkooler/ansi-bbs/tree/master/ancient_formats/files_bbs
            //
            const detectDecoder = () => {
                //  helpers
                const regExpTestUpTo = (n, re) => {
                    return lines
                        .slice(0, n)
                        .some(l => re.test(l));
                };

                //
                //  Try to figure out which decoder to use
                //
                const decoders = [
                    {
                        //  I've been told this is what Syncrhonet uses
                        lineRegExp : /^([^ ]{1,12})\s{1,11}([0-3][0-9]\/[0-3][0-9]\/[1789][0-9]) ([^\r\n]+)$/,
                        detect : function() {
                            return regExpTestUpTo(10, this.lineRegExp);
                        },
                        extract : function() {
                            for(let i = 0; i < lines.length; ++i) {
                                let line = lines[i];
                                const hdr = line.match(this.lineRegExp);
                                if(!hdr) {
                                    continue;
                                }
                                const long = [];
                                for(let j = i + 1; j < lines.length; ++j) {
                                    line = lines[j];
                                    if(!line.startsWith(' ')) {
                                        break;
                                    }
                                    long.push(line.trim());
                                    ++i;
                                }
                                const desc      = long.join('\r\n') || hdr[3] || '';
                                const fileName  = hdr[1];
                                const timestamp = moment(hdr[2], 'MM/DD/YY');

                                if(isBadDescription(desc) || !timestamp.isValid()) {
                                    continue;
                                }
                                filesBbs.entries.set(fileName, { timestamp, desc } );
                            }
                        }
                    },

                    {
                        //
                        //  Examples:
                        //  - Night Owl CD #7, 1992
                        //
                        lineRegExp  : /^([^\s]{1,12})\s{2,14}\[0\]\s\s([^\r\n]+)$/,
                        detect : function() {
                            return regExpTestUpTo(10, this.lineRegExp);
                        },
                        extract : function() {
                            for(let i = 0; i < lines.length; ++i) {
                                let line = lines[i];
                                const hdr = line.match(this.lineRegExp);
                                if(!hdr) {
                                    continue;
                                }
                                const long = [ hdr[2].trim() ];
                                for(let j = i + 1; j < lines.length; ++j) {
                                    line = lines[j];
                                    // -------------------------------------------------v 32
                                    if(!line.startsWith('                               | ')) {
                                        break;
                                    }
                                    long.push(line.substr(33));
                                    ++i;
                                }
                                const desc      = long.join('\r\n');
                                const fileName  = hdr[1];

                                if(isBadDescription(desc)) {
                                    continue;
                                }

                                filesBbs.entries.set(fileName, { desc } );
                            }
                        }
                    },

                    {
                        //
                        //  Simple first line with partial description,
                        //  secondary description lines tabbed out.
                        //
                        //  Examples
                        //  - GUS archive @ dk.toastednet.org
                        //
                        lineRegExp : /^([^\s]{1,12})\s+\[00\]\s([^\r\n]+)$/,
                        detect  : function() {
                            return regExpTestUpTo(10, this.lineRegExp);
                        },
                        extract : function() {
                            for(let i = 0; i < lines.length; ++i) {
                                let line = lines[i];
                                const hdr = line.match(this.lineRegExp);
                                if(!hdr) {
                                    continue;
                                }
                                const long = [ hdr[2].trimRight() ];
                                for(let j = i + 1; j < lines.length; ++j) {
                                    line = lines[j];
                                    if(!line.startsWith('\t\t  ')) {
                                        break;
                                    }
                                    long.push(line.substr(4));
                                    ++i;
                                }
                                const desc      = long.join('\r\n');
                                const fileName  = hdr[1];

                                if(isBadDescription(desc)) {
                                    continue;
                                }

                                filesBbs.entries.set(fileName, { desc } );
                            }
                        }
                    },

                    {
                        //
                        //  <8.3FileName> <size> <MM-DD-YY> <desc first line>
                        //                                  <desc...>
                        //  Examples:
                        //  - Expanding Your BBS CD by David Wolfe, 1995
                        //
                        lineRegExp : /^([^ ]{1,12})\s{1,20}([0-9]+)\s\s([0-3][0-9]-[0-3][0-9]-[1789][0-9])\s\s([^\r\n]+)$/,
                        detect  : function() {
                            return regExpTestUpTo(10, this.lineRegExp);
                        },
                        extract : function() {
                            for(let i = 0; i < lines.length; ++i) {
                                let line = lines[i];
                                const hdr = line.match(this.lineRegExp);
                                if(!hdr) {
                                    continue;
                                }

                                const firstDescLine = hdr[4].trimRight();
                                const long = [ firstDescLine ];
                                for(let j = i + 1; j < lines.length; ++j) {
                                    line = lines[j];
                                    if(!line.startsWith(' '.repeat(34))) {
                                        break;
                                    }
                                    long.push(line.substr(34).trimRight());
                                    ++i;
                                }

                                const desc      = long.join('\r\n');
                                const fileName  = hdr[1];
                                const size      = parseInt(hdr[2]);
                                const timestamp = moment(hdr[3], 'MM-DD-YY');

                                if(isBadDescription(desc) || isNaN(size) || !timestamp.isValid()) {
                                    continue;
                                }

                                filesBbs.entries.set(fileName, { desc, size, timestamp });
                            }
                        }
                    },

                    {
                        //
                        //  Examples:
                        //  - Aminet Amiga CDROM, March 1994.  Walnut Creek CDROM.
                        //  - CP/M CDROM, Sep. 1994.  Walnut Creek CDROM.
                        //  - ...and many others.
                        //
                        //  Basically: <8.3 filename> <description>
                        //
                        //  May contain headers, but we'll just skip 'em.
                        //
                        lineRegExp : /^([^ ]{1,12})\s{1,11}([^\r\n]+)$/,
                        detect : function() {
                            return regExpTestUpTo(10, this.lineRegExp);
                        },
                        extract : function() {
                            lines.forEach(line => {
                                const hdr = line.match(this.lineRegExp);
                                if(!hdr) {
                                    return; //  forEach
                                }

                                const fileName  = hdr[1].trim();
                                const desc      = hdr[2].trim();

                                if(desc && !isBadDescription(desc)) {
                                    filesBbs.entries.set(fileName, { desc } );
                                }
                            });
                        }
                    },

                    {
                        //
                        //  Examples:
                        //  - AMINET CD's & similar
                        //
                        lineRegExp : /^(.{1,22}) ([0-9]+)K ([^\r\n]+)$/,
                        detect : function() {
                            return regExpTestUpTo(10, this.lineRegExp);
                        },
                        extract : function() {
                            lines.forEach(line => {
                                const hdr = line.match(this.tester);
                                if(!hdr) {
                                    return; //  forEach
                                }

                                const fileName  = hdr[1].trim();
                                let size        = parseInt(hdr[2]);
                                const desc      = hdr[3].trim();

                                if(isNaN(size)) {
                                    return; //  forEach
                                }
                                size *= 1024;   //  K->bytes.

                                if(desc) {  //  omit empty entries
                                    filesBbs.entries.set(fileName, { size, desc } );
                                }
                            });
                        }
                    },
                ];

                const decoder = decoders.find(d => d.detect());
                return decoder;
            };

            const decoder = detectDecoder();
            if(!decoder) {
                return cb(Errors.Invalid('Invalid or unrecognized FILES.BBS format'));
            }

            decoder.extract(decoder);

            return cb(
                filesBbs.entries.size > 0 ? null : Errors.Invalid('Invalid or unrecognized FILES.BBS format'),
                filesBbs
            );
        });
    }


};