From 29947611f617374e5c9db06109c6e85155d21ef2 Mon Sep 17 00:00:00 2001 From: Bryan Ashby Date: Wed, 5 Oct 2016 23:22:59 -0600 Subject: [PATCH] * New archives/archiver format - more flexible, more formats, etc. * Add scanning inside archives ability, extract file_id.diz, etc. * Initial year est (WIP) work --- core/archive_util.js | 95 +++++++++++++++++++++++++++--------- core/art.js | 2 +- core/config.js | 56 ++++++++------------- core/file_area.js | 113 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 205 insertions(+), 61 deletions(-) diff --git a/core/archive_util.js b/core/archive_util.js index c21bf4ce..81daf9c1 100644 --- a/core/archive_util.js +++ b/core/archive_util.js @@ -4,6 +4,7 @@ // ENiGMA½ const Config = require('./config.js').config; const stringFormat = require('./string_format.js'); +const Errors = require('./enig_error.js').Errors; // base/modules const fs = require('fs'); @@ -19,8 +20,8 @@ class Archiver { this.list = config.list; this.extract = config.extract; - this.sig = new Buffer(config.sig, 'hex'); - this.offset = config.offset || 0; + /*this.sig = new Buffer(config.sig, 'hex'); + this.offset = config.offset || 0;*/ } ok() { @@ -61,10 +62,10 @@ module.exports = class ArchiveUtil { // // Load configuration // - if(_.has(Config, 'archivers')) { - Object.keys(Config.archivers).forEach(archKey => { + if(_.has(Config, 'archives.archivers')) { + Object.keys(Config.archives.archivers).forEach(archKey => { - const archConfig = Config.archivers[archKey]; + const archConfig = Config.archives.archivers[archKey]; const archiver = new Archiver(archConfig); if(!archiver.ok()) { @@ -72,21 +73,43 @@ module.exports = class ArchiveUtil { } this.archivers[archKey] = archiver; + }); + } - if(archiver.offset + archiver.sig.length > this.longestSignature) { - this.longestSignature = archiver.offset + archiver.sig.length; - } + if(_.has(Config, 'archives.formats')) { + Object.keys(Config.archives.formats).forEach(fmtKey => { + + Config.archives.formats[fmtKey].sig = new Buffer(Config.archives.formats[fmtKey].sig, 'hex'); + Config.archives.formats[fmtKey].offset = Config.archives.formats[fmtKey].offset || 0; + + const sigLen = Config.archives.formats[fmtKey].offset + Config.archives.formats[fmtKey].sig.length; + if(sigLen > this.longestSignature) { + this.longestSignature = sigLen; + } }); } } + /* getArchiver(archType) { - if(!archType) { + if(!archType || 0 === archType.length) { return; } archType = archType.toLowerCase(); return this.archivers[archType]; + }*/ + + getArchiver(archType) { + if(!archType || 0 === archType.length) { + return; + } + + if(_.has(Config, [ 'archives', 'formats', archType, 'handler' ] ) && + _.has(Config, [ 'archives', 'archivers', Config.archives.formats[archType].handler ] )) + { + return Config.archives.archivers[ Config.archives.formats[archType].handler ]; + } } haveArchiver(archType) { @@ -98,31 +121,33 @@ module.exports = class ArchiveUtil { } detectType(path, cb) { + if(!_.has(Config, 'archives.formats')) { + return cb(Errors.DoesNotExist('No formats configured')); + } + fs.open(path, 'r', (err, fd) => { if(err) { - cb(err); - return; + return cb(err); } - let buf = new Buffer(this.longestSignature); + const buf = new Buffer(this.longestSignature); fs.read(fd, buf, 0, buf.length, 0, (err, bytesRead) => { if(err) { return cb(err); } - // return first match - const detected = _.findKey(this.archivers, arch => { - const lenNeeded = arch.offset + arch.sig.length; - + const archFormat = _.findKey(Config.archives.formats, archFormat => { + const lenNeeded = archFormat.offset + archFormat.sig.length; + if(bytesRead < lenNeeded) { return false; } - const comp = buf.slice(arch.offset, arch.offset + arch.sig.length); - return (arch.sig.equals(comp)); + const comp = buf.slice(archFormat.offset, archFormat.offset + archFormat.sig.length); + return (archFormat.sig.equals(comp)); }); - cb(detected ? null : new Error('Unknown type'), detected); + return cb(archFormat ? null : Errors.General('Unknown type'), archFormat); }); }); } @@ -157,7 +182,7 @@ module.exports = class ArchiveUtil { const fmtObj = { archivePath : archivePath, - fileList : files.join(' '), + fileList : files.join(' '), // :TODO: probably need same hack as extractTo here! }; const args = archiver.compress.args.map( arg => stringFormat(arg, fmtObj) ); @@ -166,7 +191,17 @@ module.exports = class ArchiveUtil { return this.spawnHandler(proc, 'Compression', cb); } - extractTo(archivePath, extractPath, archType, cb) { + extractTo(archivePath, extractPath, archType, fileList, cb) { + let haveFileList; + + if(!cb && _.isFunction(fileList)) { + cb = fileList; + fileList = []; + haveFileList = false; + } else { + haveFileList = true; + } + const archiver = this.getArchiver(archType); if(!archiver) { @@ -178,10 +213,22 @@ module.exports = class ArchiveUtil { extractPath : extractPath, }; - const args = archiver.decompress.args.map( arg => stringFormat(arg, fmtObj) ); - const proc = pty.spawn(archiver.decompress.cmd, args, this.getPtyOpts()); + const action = haveFileList ? 'extract' : 'decompress'; - return this.spawnHandler(proc, 'Decompression', cb); + // we need to treat {fileList} special in that it should be broken up to 0:n args + const args = archiver[action].args.map( arg => { + return '{fileList}' === arg ? arg : stringFormat(arg, fmtObj); + }); + + const fileListPos = args.indexOf('{fileList}'); + if(fileListPos > -1) { + // replace {fileList} with 0:n sep file list arguments + args.splice.apply(args, [fileListPos, 1].concat(fileList)); + } + + const proc = pty.spawn(archiver[action].cmd, args, this.getPtyOpts()); + + return this.spawnHandler(proc, (haveFileList ? 'Extraction' : 'Decompression'), cb); } listEntries(archivePath, archType, cb) { diff --git a/core/art.js b/core/art.js index 0e283218..4b870cde 100644 --- a/core/art.js +++ b/core/art.js @@ -7,7 +7,6 @@ const miscUtil = require('./misc_util.js'); const ansi = require('./ansi_term.js'); const aep = require('./ansi_escape_parser.js'); const sauce = require('./sauce.js'); -const farmhash = require('farmhash'); // deps const fs = require('fs'); @@ -15,6 +14,7 @@ const paths = require('path'); const assert = require('assert'); const iconv = require('iconv-lite'); const _ = require('lodash'); +const farmhash = require('farmhash'); exports.getArt = getArt; exports.getArtFromPath = getArtFromPath; diff --git a/core/config.js b/core/config.js index 96427624..c94470bb 100644 --- a/core/config.js +++ b/core/config.js @@ -211,49 +211,25 @@ function getDefaultConfig() { } }, - archivers : { - zip : { - sig : '504b0304', - offset : 0, - compress : { - cmd : '7z', - args : [ 'a', '-tzip', '{archivePath}', '{fileList}' ], - }, - decompress : { - cmd : '7z', - args : [ 'e', '-o{extractPath}', '{archivePath}' ] - }, - list : { - cmd : '7z', - args : [ 'l', '{archivePath}' ], - entryMatch : '^[0-9]{4}-[0-9]{2}-[0-9]{2}\\s[0-9]{2}:[0-9]{2}:[0-9]{2}\\s[A-Za-z\\.]{5}\\s+([0-9]+)\\s+[0-9]+\\s+([^\\r\\n]+)$', - }, - extract : { - cmd : '7z', - args : [ 'x', '-o{extractPath}', '{archivePath}', '{fileList}' ], - }, - }, - }, - - archivers2 : { - tools : { + archives : { + archivers : { '7Zip' : { compress : { - cmd : '7z', + cmd : '7za', args : [ 'a', '-tzip', '{archivePath}', '{fileList}' ], }, decompress : { - cmd : '7z', + cmd : '7za', args : [ 'e', '-o{extractPath}', '{archivePath}' ] }, list : { - cmd : '7z', + cmd : '7za', args : [ 'l', '{archivePath}' ], entryMatch : '^[0-9]{4}-[0-9]{2}-[0-9]{2}\\s[0-9]{2}:[0-9]{2}:[0-9]{2}\\s[A-Za-z\\.]{5}\\s+([0-9]+)\\s+[0-9]+\\s+([^\\r\\n]+)$', }, extract : { - cmd : '7z', - args : [ 'x', '-o{extractPath}', '{archivePath}', '{fileList}' ], + cmd : '7za', + args : [ 'e', '-o{extractPath}', '{archivePath}', '{fileList}' ], }, } }, @@ -262,25 +238,25 @@ function getDefaultConfig() { sig : '504b0304', offset : 0, exts : [ 'zip' ], - tool : '7Zip', + handler : '7Zip', }, '7z' : { sig : '377abcaf271c', offset : 0, exts : [ '7z' ], - tool : '7Zip', + handler : '7Zip', }, arj : { sig : '60ea', offset : 0, exts : [ 'arj' ], - tool : '7Zip', + handler : '7Zip', }, rar : { sig : '526172211a0700', offset : 0, exts : [ 'rar' ], - tool : '7Zip', + handler : '7Zip', } } }, @@ -367,6 +343,16 @@ function getDefaultConfig() { longDesc : [ '^.*\.NFO$', '^README\.1ST$', '^README\.TXT$' ], }, + yearEstPatterns: [ + // + // Patterns should produce the year in the first submatch + // The year may be YY or YYYY + // + '[0-3]?[0-9][\\-\\/\\.][0-3]?[0-9][\\-\\/\\.]((?:[0-9]{2})?[0-9]{2})', // m/d/yyyy, mm-dd-yyyy, etc. + "\\B('[1789][0-9])\\b", + // :TODO: DD/MMM/YY, DD/MMMM/YY, DD/MMM/YYYY, etc. + ], + areas: { message_attachment : { name : 'Message attachments', diff --git a/core/file_area.js b/core/file_area.js index c6ae2f0e..af5c3075 100644 --- a/core/file_area.js +++ b/core/file_area.js @@ -15,6 +15,8 @@ const async = require('async'); const fs = require('fs'); const crypto = require('crypto'); const paths = require('path'); +const temp = require('temp').track(); // track() cleans up temp dir/files for us +const iconv = require('iconv-lite'); exports.getAvailableFileAreas = getAvailableFileAreas; exports.getSortedAvailableFileAreas = getSortedAvailableFileAreas; @@ -140,6 +142,41 @@ function getExistingFileEntriesBySha1(sha1, cb) { ); } +// :TODO: This is bascially sliceAtEOF() from art.js .... DRY! +function sliceAtSauceMarker(data) { + let eof = data.length; + const stopPos = Math.max(data.length - (256), 0); // 256 = 2 * sizeof(SAUCE) + + for(let i = eof - 1; i > stopPos; i--) { + if(0x1a === data[i]) { + eof = i; + break; + } + } + return data.slice(0, eof); +} + +function getEstYear(input) { + // :TODO: yearEstPatterns RegExp's should be cached - we can do this @ Config (re)load time + const patterns = Config.fileBase.yearEstPatterns.map( p => new RegExp(p, 'gmi')); + + let match; + for(let i = 0; i < patterns.length; ++i) { + match = patterns[i].exec(input); + if(match) { + break; + } + } + + if(match) { + if(2 == match[1].length) { + return parseInt('19' + match[1]); + } else { + return parseInt(match[1]); + } + } +} + function addNewArchiveFileEnty(fileEntry, filePath, archiveType, cb) { const archiveUtil = ArchiveUtil.getInstance(); @@ -153,15 +190,78 @@ function addNewArchiveFileEnty(fileEntry, filePath, archiveType, cb) { function extractDescFiles(entries, callback) { // :TODO: would be nice if these RegExp's were cached + // :TODO: this is long winded... + + const extractList = []; + const shortDescFile = entries.find( e => { return Config.fileBase.fileNamePatterns.shortDesc.find( pat => new RegExp(pat, 'i').test(e.fileName) ); }); + if(shortDescFile) { + extractList.push(shortDescFile.fileName); + } + const longDescFile = entries.find( e => { return Config.fileBase.fileNamePatterns.longDesc.find( pat => new RegExp(pat, 'i').test(e.fileName) ); }); - return callback(null); + if(longDescFile) { + extractList.push(longDescFile.fileName); + } + + temp.mkdir('enigextract-', (err, tempDir) => { + if(err) { + return callback(err); + } + + archiveUtil.extractTo(filePath, tempDir, archiveType, extractList, err => { + if(err) { + return callback(err); + } + + const descFiles = { + desc : shortDescFile ? paths.join(tempDir, shortDescFile.fileName) : null, + descLong : longDescFile ? paths.join(tempDir, longDescFile.fileName) : null, + }; + + return callback(null, descFiles); + }); + }); + }, + function readDescFiles(descFiles, callback) { + // :TODO: we shoudl probably omit files that are too large + async.each(Object.keys(descFiles), (descType, next) => { + const path = descFiles[descType]; + if(!path) { + return next(null); + } + + fs.readFile(path, (err, data) => { + if(err || !data) { + return next(null); + } + + // + // Assume FILE_ID.DIZ, NFO files, etc. are CP437. + // + // :TODO: This isn't really always the case - how to handle this? We could do a quick detection... + fileEntry[descType] = iconv.decode(sliceAtSauceMarker(data, 0x1a), 'cp437'); + return next(null); + }); + }, () => { + // cleanup, but don't wait... + temp.cleanup( err => { + // :TODO: Log me! + }); + return callback(null); + }); + }, + function attemptReleaseYearEstimation(callback) { + let estYear; + if(fileEntry.descLong) { + estYear = getEstYear(fileEntry.descLong); + } } ], err => { @@ -240,6 +340,17 @@ function addOrUpdateFileEntry(areaInfo, fileName, options, cb) { if(existingEntries.length > 0) { } else { + // + // Some basics for new entries + // + fileEntry.meta.user_rating = 0; + if(options.uploadByUserName) { + fileEntry.meta.upload_by_username = options.uploadByUserName; + } + if(options.uploadByUserId) { + fileEntry.meta.upload_by_user_id = options.uploadByUserId; + } + return addNewFileEntry(fileEntry, filePath, callback); } },