From 8817113364322f5a96f6f42c756b7fa4d14a36aa Mon Sep 17 00:00:00 2001 From: Bryan Ashby Date: Sat, 2 May 2020 13:34:28 -0600 Subject: [PATCH] * Create bundle filenames to spec * Better cp437 vs utf8 vs other encoding support * Add some CP437 and related utils --- core/cp437util.js | 55 +++++++++++++ core/message.js | 17 ++++ core/oputil/oputil_message_base.js | 8 +- core/qwk_mail_packet.js | 121 ++++++++++++++++++++++++----- core/string_util.js | 15 ++++ 5 files changed, 191 insertions(+), 25 deletions(-) create mode 100644 core/cp437util.js diff --git a/core/cp437util.js b/core/cp437util.js new file mode 100644 index 00000000..32425d3a --- /dev/null +++ b/core/cp437util.js @@ -0,0 +1,55 @@ + + +const CP437UnicodeTable = [ + '\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', + '\u0007', '\u0008', '\u0009', '\u000A', '\u000B', '\u000C', '\u000D', + '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', '\u0014', + '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', + '\u001C', '\u001D', '\u001E', '\u001F', '\u0020', '\u0021', '\u0022', + '\u0023', '\u0024', '\u0025', '\u0026', '\u0027', '\u0028', '\u0029', + '\u002A', '\u002B', '\u002C', '\u002D', '\u002E', '\u002F', '\u0030', + '\u0031', '\u0032', '\u0033', '\u0034', '\u0035', '\u0036', '\u0037', + '\u0038', '\u0039', '\u003A', '\u003B', '\u003C', '\u003D', '\u003E', + '\u003F', '\u0040', '\u0041', '\u0042', '\u0043', '\u0044', '\u0045', + '\u0046', '\u0047', '\u0048', '\u0049', '\u004A', '\u004B', '\u004C', + '\u004D', '\u004E', '\u004F', '\u0050', '\u0051', '\u0052', '\u0053', + '\u0054', '\u0055', '\u0056', '\u0057', '\u0058', '\u0059', '\u005A', + '\u005B', '\u005C', '\u005D', '\u005E', '\u005F', '\u0060', '\u0061', + '\u0062', '\u0063', '\u0064', '\u0065', '\u0066', '\u0067', '\u0068', + '\u0069', '\u006A', '\u006B', '\u006C', '\u006D', '\u006E', '\u006F', + '\u0070', '\u0071', '\u0072', '\u0073', '\u0074', '\u0075', '\u0076', + '\u0077', '\u0078', '\u0079', '\u007A', '\u007B', '\u007C', '\u007D', + '\u007E', '\u007F', '\u00C7', '\u00FC', '\u00E9', '\u00E2', '\u00E4', + '\u00E0', '\u00E5', '\u00E7', '\u00EA', '\u00EB', '\u00E8', '\u00EF', + '\u00EE', '\u00EC', '\u00C4', '\u00C5', '\u00C9', '\u00E6', '\u00C6', + '\u00F4', '\u00F6', '\u00F2', '\u00FB', '\u00F9', '\u00FF', '\u00D6', + '\u00DC', '\u00A2', '\u00A3', '\u00A5', '\u20A7', '\u0192', '\u00E1', + '\u00ED', '\u00F3', '\u00FA', '\u00F1', '\u00D1', '\u00AA', '\u00BA', + '\u00BF', '\u2310', '\u00AC', '\u00BD', '\u00BC', '\u00A1', '\u00AB', + '\u00BB', '\u2591', '\u2592', '\u2593', '\u2502', '\u2524', '\u2561', + '\u2562', '\u2556', '\u2555', '\u2563', '\u2551', '\u2557', '\u255D', + '\u255C', '\u255B', '\u2510', '\u2514', '\u2534', '\u252C', '\u251C', + '\u2500', '\u253C', '\u255E', '\u255F', '\u255A', '\u2554', '\u2569', + '\u2566', '\u2560', '\u2550', '\u256C', '\u2567', '\u2568', '\u2564', + '\u2565', '\u2559', '\u2558', '\u2552', '\u2553', '\u256B', '\u256A', + '\u2518', '\u250C', '\u2588', '\u2584', '\u258C', '\u2590', '\u2580', + '\u03B1', '\u00DF', '\u0393', '\u03C0', '\u03A3', '\u03C3', '\u00B5', + '\u03C4', '\u03A6', '\u0398', '\u03A9', '\u03B4', '\u221E', '\u03C6', + '\u03B5', '\u2229', '\u2261', '\u00B1', '\u2265', '\u2264', '\u2320', + '\u2321', '\u00F7', '\u2248', '\u00B0', '\u2219', '\u00B7', '\u221A', + '\u207F', '\u00B2', '\u25A0', '\u00A0' +]; + +const NonCP437EncodableRegExp = /[^\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000A\u000B\u000C\u000D\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u0020\u0021\u0022\u0023\u0024\u0025\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u002D\u002E\u002F\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039\u003A\u003B\u003C\u003D\u003E\u003F\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047\u0048\u0049\u004A\u004B\u004C\u004D\u004E\u004F\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057\u0058\u0059\u005A\u005B\u005C\u005D\u005E\u005F\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067\u0068\u0069\u006A\u006B\u006C\u006D\u006E\u006F\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077\u0078\u0079\u007A\u007B\u007C\u007D\u007E\u007F\u00C7\u00FC\u00E9\u00E2\u00E4\u00E0\u00E5\u00E7\u00EA\u00EB\u00E8\u00EF\u00EE\u00EC\u00C4\u00C5\u00C9\u00E6\u00C6\u00F4\u00F6\u00F2\u00FB\u00F9\u00FF\u00D6\u00DC\u00A2\u00A3\u00A5\u20A7\u0192\u00E1\u00ED\u00F3\u00FA\u00F1\u00D1\u00AA\u00BA\u00BF\u2310\u00AC\u00BD\u00BC\u00A1\u00AB\u00BB\u2591\u2592\u2593\u2502\u2524\u2561\u2562\u2556\u2555\u2563\u2551\u2557\u255D\u255C\u255B\u2510\u2514\u2534\u252C\u251C\u2500\u253C\u255E\u255F\u255A\u2554\u2569\u2566\u2560\u2550\u256C\u2567\u2568\u2564\u2565\u2559\u2558\u2552\u2553\u256B\u256A\u2518\u250C\u2588\u2584\u258C\u2590\u2580\u03B1\u00DF\u0393\u03C0\u03A3\u03C3\u00B5\u03C4\u03A6\u0398\u03A9\u03B4\u221E\u03C6\u03B5\u2229\u2261\u00B1\u2265\u2264\u2320\u2321\u00F7\u2248\u00B0\u2219\u00B7\u221A\u207F\u00B2\u25A0\u00A0]/; +const isCP437Encodable = (s) => { + if (!s.length) { + return true; + } + + return !NonCP437EncodableRegExp.test(s); +} + +module.exports = { + CP437UnicodeTable, + isCP437Encodable, +} \ No newline at end of file diff --git a/core/message.js b/core/message.js index 3649a6f6..c763bbd2 100644 --- a/core/message.js +++ b/core/message.js @@ -11,6 +11,9 @@ const { sanitizeString, getISOTimestampString } = require('./database.js'); +const { isCP437Encodable } = require('./cp437util'); +const { containsNonLatinCodepoints } = require('./string_util'); + const { isAnsi, isFormattedLine, splitTextAtTerms, @@ -145,6 +148,20 @@ module.exports = class Message { return null !== _.get(this, 'meta.System.remote_from_user', null); } + isCP437Encodable() { + return isCP437Encodable(this.toUserName) && + isCP437Encodable(this.fromUserName) && + isCP437Encodable(this.subject) && + isCP437Encodable(this.message); + } + + containsNonLatinCodepoints() { + return containsNonLatinCodepoints(this.toUserName) || + containsNonLatinCodepoints(this.fromUserName) || + containsNonLatinCodepoints(this.subject) || + containsNonLatinCodepoints(this.message); + } + /* :TODO: finish me static checkUserHasDeleteRights(user, messageIdOrUuid, cb) { diff --git a/core/oputil/oputil_message_base.js b/core/oputil/oputil_message_base.js index 7d914ab2..6f677c52 100644 --- a/core/oputil/oputil_message_base.js +++ b/core/oputil/oputil_message_base.js @@ -464,8 +464,6 @@ function dumpQWKPacket(packetPath) { const { QWKPacketWriter } = require('../qwk_mail_packet'); const writer = new QWKPacketWriter({ bbsID : 'XIBALBA', - toUser : 'NuSkooler', - encoding : 'cp437', }); const { QWKPacketReader } = require('../qwk_mail_packet'); @@ -547,6 +545,8 @@ function exportQWKPacket(packetPath) { // oputil mb qwk export SPEC PATH [--user USER] // [areaTag1[@dateTime]],[...] PATH --user USER + // :TODO: bbsID from PATH filename else 'ENIGMA' + const posArgLen = argv._.length; if (posArgLen < 4) { @@ -619,8 +619,6 @@ function exportQWKPacket(packetPath) { const writer = new QWKPacketWriter({ // :TODO: export needs these options bbsID : 'XIBALBA', - toUser : 'NuSkooler', - encoding : 'cp437', user : user, }); @@ -635,7 +633,7 @@ function exportQWKPacket(packetPath) { }); }, (err) => { - writer.finish('/home/nuskooler/Downloads/qwk2/TEST1.QWK'); + writer.finish('/home/nuskooler/Downloads/qwk2/'); if (err) { console.error(`Failed to write one or more messages: ${err.message}`); } diff --git a/core/qwk_mail_packet.js b/core/qwk_mail_packet.js index 761b71f7..b7f53ebc 100644 --- a/core/qwk_mail_packet.js +++ b/core/qwk_mail_packet.js @@ -11,6 +11,7 @@ const Config = require('./config').get; const SysProps = require('./system_property'); const UserProps = require('./user_property'); const { numToMbf32 } = require('./mbf'); +const { getEncodingFromCharacterSetIdentifier } = require('./ftn_util'); const { EventEmitter } = require('events'); const temptmp = require('temptmp'); @@ -823,11 +824,11 @@ class QWKPacketWriter extends EventEmitter { enableQWKE = true, enableHeadersExtension = true, enableAtKludges = true, - encoding = 'cp437', systemDomain = 'enigma-bbs', bbsID = '', user = null, archiveFormat = 'application/zip', + forceEncoding = null, } = QWKPacketWriter.DefaultOptions) { super(); @@ -840,7 +841,7 @@ class QWKPacketWriter extends EventEmitter { bbsID, user, archiveFormat, - encoding : encoding.toLowerCase(), + forceEncoding : forceEncoding ? forceEncoding.toLowerCase() : null, }; this.temptmp = temptmp.createTrackedSession('qwkpacketwriter'); @@ -851,11 +852,11 @@ class QWKPacketWriter extends EventEmitter { enableQWKE : true, enableHeadersExtension : true, enableAtKludges : true, - encoding : 'cp437', systemDomain : 'enigma-bbs', bbsID : '', user : null, archiveFormat :'application/zip', + forceEncoding : null, }; } @@ -945,6 +946,8 @@ class QWKPacketWriter extends EventEmitter { } } else { fullMessageBody += `@MSGID: ${this.makeMessageIdentifier(message)}\n`; + fullMessageBody += `@TZ: ${UTCOffsetToSMBTZ[moment().format('Z')]}\n`; + // :TODO: REPLY and REPLYTO } } @@ -953,14 +956,16 @@ class QWKPacketWriter extends EventEmitter { fullMessageBody += `${line}\n`; }); - const encodedMessage = iconv.encode(fullMessageBody, this.options.encoding); + const encoding = this._getEncoding(message); + + const encodedMessage = iconv.encode(fullMessageBody, encoding); // // QWK spec wants line feeds as 0xe3 for some reason, so we'll have // to replace the \n's. If we're going against the spec and using UTF-8 // we can just leave them be. // - if ('utf8' !== this.options.encoding) { + if ('utf8' !== encoding) { replaceCharInBuffer(encodedMessage, 0x0a, QWKLF); } @@ -989,7 +994,7 @@ class QWKPacketWriter extends EventEmitter { this._updateIndexTracking(message); if (this.options.enableHeadersExtension) { - this._appendHeadersExtensionData(message); + this._appendHeadersExtensionData(message, encoding); } // next message starts at this block @@ -999,6 +1004,38 @@ class QWKPacketWriter extends EventEmitter { this.areaTagsSeen.add(message.areaTag); } + _getEncoding(message) { + if (this.options.forceEncoding) { + return this.options.forceEncoding; + } + + // If the system has stored an explicit encoding, use that. + let encoding = _.get(message.meta, 'System.explicit_encoding'); + if (encoding) { + return encoding; + } + + // If the message is already tagged with a supported encoding + // indicator such as FTN-style CHRS, try to use that. + encoding = _.get(message.meta, 'FtnKludge.CHRS'); + if (encoding) { + // convert from CHRS to something standard + encoding = getEncodingFromCharacterSetIdentifier(encoding); + if (encoding) { + return encoding; + } + } + + // The to-spec default is CP437/ASCII. If it can be encoded as + // such then do so. + if (message.isCP437Encodable()) { + return 'cp437'; + } + + // Something more modern... + return 'utf8'; + } + _messageAddressedToUser(message) { if (_.isUndefined(this.cachedCompareNames)) { if (this.options.user) { @@ -1041,7 +1078,7 @@ class QWKPacketWriter extends EventEmitter { } - finish(packetPath) { + finish(packetDirectory) { async.series( [ (callback) => { @@ -1066,7 +1103,7 @@ class QWKPacketWriter extends EventEmitter { return this._createIndexes(callback); }, (callback) => { - return this._producePacketArchive(packetPath, callback); + return this._producePacketArchive(packetDirectory, callback); } ], err => { @@ -1081,7 +1118,44 @@ class QWKPacketWriter extends EventEmitter { ) } - _producePacketArchive(packetPath, cb) { + _getNextAvailPacketFileName(packetDirectory, cb) { + // + // According to http://wiki.synchro.net/ref:qwk filenames should + // start with .QWK -> .QW1 ... .QW9 -> .Q10 ... .Q99 + // + let digits = 0; + async.doWhilst( callback => { + let ext; + if (0 === digits) { + ext = 'QWK'; + } else if (digits < 10) { + ext = `QW${digits}`; + } else if (digits < 100) { + ext = `Q${digits}`; + } else { + return callback(Errors.UnexpectedState(`Unable to choose a valid QWK output filename`)); + } + + ++digits; + + const filename = `${this.options.bbsID}.${ext}`; + fs.stat(paths.join(packetDirectory, filename), (err, stats) => { + if (err && 'ENOENT' === err.code) { + return callback(null, filename); + } else { + return callback(null, null); + } + }); + }, + (filename, callback) => { + return callback(null, filename ? false : true); + }, + (err, filename) => { + return cb(err, filename); + }); + } + + _producePacketArchive(packetDirectory, cb) { const archiveUtil = ArchiveUtil.getInstance(); fs.readdir(this.workDir, (err, files) => { @@ -1089,15 +1163,22 @@ class QWKPacketWriter extends EventEmitter { return cb(err); } - archiveUtil.compressTo( - this.options.archiveFormat, - packetPath, - files, - this.workDir, - err => { + this._getNextAvailPacketFileName(packetDirectory, (err, filename) => { + if (err) { return cb(err); } - ); + + const packetPath = paths.join(packetDirectory, filename); + archiveUtil.compressTo( + this.options.archiveFormat, + packetPath, + files, + this.workDir, + err => { + return cb(err); + } + ); + }); }); } @@ -1306,10 +1387,10 @@ class QWKPacketWriter extends EventEmitter { return `${syncTimestamp} ${syncTZ}`; } - _appendHeadersExtensionData(message) { + _appendHeadersExtensionData(message, encoding) { const messageData = { // Synchronet style - Utf8 : ('utf8' === this.options.encoding ? 'true' : 'false'), + Utf8 : ('utf8' === encoding ? 'true' : 'false'), 'Message-ID' : this.makeMessageIdentifier(message), WhenWritten : this._makeSynchronetTimestamp(message.modTimestamp), @@ -1367,11 +1448,11 @@ class QWKPacketWriter extends EventEmitter { messageData.Editor = `ENiGMA 1/2 BBS FSE v${enigmaVersion}`; } - this.headersDatStream.write(iconv.encode(`[${this.currentMessageOffset.toString(16)}]\r\n`, this.options.encoding)); + this.headersDatStream.write(iconv.encode(`[${this.currentMessageOffset.toString(16)}]\r\n`, encoding)); for (let [name, value] of Object.entries(messageData)) { if (value) { - this.headersDatStream.write(iconv.encode(`${name}: ${value}\r\n`, this.options.encoding)); + this.headersDatStream.write(iconv.encode(`${name}: ${value}\r\n`, encoding)); } } diff --git a/core/string_util.js b/core/string_util.js index fa9a9097..cde7ac3e 100644 --- a/core/string_util.js +++ b/core/string_util.js @@ -13,6 +13,7 @@ exports.pad = pad; exports.insert = insert; exports.replaceAt = replaceAt; exports.isPrintable = isPrintable; +exports.containsNonLatinCodepoints = containsNonLatinCodepoints; exports.stripAllLineFeeds = stripAllLineFeeds; exports.debugEscapedString = debugEscapedString; exports.stringFromNullTermBuffer = stringFromNullTermBuffer; @@ -196,6 +197,20 @@ function isPrintable(s) { return !RE_NON_PRINTABLE.test(s); } +const NonLatinCodePointsRegExp = /[^\u0000-\u00ff]/; + +function containsNonLatinCodepoints(s) { + if (!s.length) { + return false; + } + + if (s.charCodeAt(0) > 255) { + return true; + } + + return NonLatinCodepointsRegEx.test(s); +} + function stripAllLineFeeds(s) { return s.replace(/\r?\n|[\r\u2028\u2029]/g, ''); }