* New archives/archiver format - more flexible, more formats, etc.

* Add scanning inside archives ability, extract file_id.diz, etc.
* Initial year est (WIP) work
This commit is contained in:
Bryan Ashby 2016-10-05 23:22:59 -06:00
parent 61b0658743
commit 29947611f6
4 changed files with 205 additions and 61 deletions

View File

@ -4,6 +4,7 @@
// ENiGMA½ // ENiGMA½
const Config = require('./config.js').config; const Config = require('./config.js').config;
const stringFormat = require('./string_format.js'); const stringFormat = require('./string_format.js');
const Errors = require('./enig_error.js').Errors;
// base/modules // base/modules
const fs = require('fs'); const fs = require('fs');
@ -19,8 +20,8 @@ class Archiver {
this.list = config.list; this.list = config.list;
this.extract = config.extract; this.extract = config.extract;
this.sig = new Buffer(config.sig, 'hex'); /*this.sig = new Buffer(config.sig, 'hex');
this.offset = config.offset || 0; this.offset = config.offset || 0;*/
} }
ok() { ok() {
@ -61,10 +62,10 @@ module.exports = class ArchiveUtil {
// //
// Load configuration // Load configuration
// //
if(_.has(Config, 'archivers')) { if(_.has(Config, 'archives.archivers')) {
Object.keys(Config.archivers).forEach(archKey => { Object.keys(Config.archives.archivers).forEach(archKey => {
const archConfig = Config.archivers[archKey]; const archConfig = Config.archives.archivers[archKey];
const archiver = new Archiver(archConfig); const archiver = new Archiver(archConfig);
if(!archiver.ok()) { if(!archiver.ok()) {
@ -72,21 +73,43 @@ module.exports = class ArchiveUtil {
} }
this.archivers[archKey] = archiver; this.archivers[archKey] = archiver;
});
}
if(archiver.offset + archiver.sig.length > this.longestSignature) { if(_.has(Config, 'archives.formats')) {
this.longestSignature = archiver.offset + archiver.sig.length; Object.keys(Config.archives.formats).forEach(fmtKey => {
Config.archives.formats[fmtKey].sig = new Buffer(Config.archives.formats[fmtKey].sig, 'hex');
Config.archives.formats[fmtKey].offset = Config.archives.formats[fmtKey].offset || 0;
const sigLen = Config.archives.formats[fmtKey].offset + Config.archives.formats[fmtKey].sig.length;
if(sigLen > this.longestSignature) {
this.longestSignature = sigLen;
} }
}); });
} }
} }
/*
getArchiver(archType) { getArchiver(archType) {
if(!archType) { if(!archType || 0 === archType.length) {
return; return;
} }
archType = archType.toLowerCase(); archType = archType.toLowerCase();
return this.archivers[archType]; return this.archivers[archType];
}*/
getArchiver(archType) {
if(!archType || 0 === archType.length) {
return;
}
if(_.has(Config, [ 'archives', 'formats', archType, 'handler' ] ) &&
_.has(Config, [ 'archives', 'archivers', Config.archives.formats[archType].handler ] ))
{
return Config.archives.archivers[ Config.archives.formats[archType].handler ];
}
} }
haveArchiver(archType) { haveArchiver(archType) {
@ -98,31 +121,33 @@ module.exports = class ArchiveUtil {
} }
detectType(path, cb) { detectType(path, cb) {
if(!_.has(Config, 'archives.formats')) {
return cb(Errors.DoesNotExist('No formats configured'));
}
fs.open(path, 'r', (err, fd) => { fs.open(path, 'r', (err, fd) => {
if(err) { if(err) {
cb(err); return cb(err);
return;
} }
let buf = new Buffer(this.longestSignature); const buf = new Buffer(this.longestSignature);
fs.read(fd, buf, 0, buf.length, 0, (err, bytesRead) => { fs.read(fd, buf, 0, buf.length, 0, (err, bytesRead) => {
if(err) { if(err) {
return cb(err); return cb(err);
} }
// return first match const archFormat = _.findKey(Config.archives.formats, archFormat => {
const detected = _.findKey(this.archivers, arch => { const lenNeeded = archFormat.offset + archFormat.sig.length;
const lenNeeded = arch.offset + arch.sig.length;
if(bytesRead < lenNeeded) { if(bytesRead < lenNeeded) {
return false; return false;
} }
const comp = buf.slice(arch.offset, arch.offset + arch.sig.length); const comp = buf.slice(archFormat.offset, archFormat.offset + archFormat.sig.length);
return (arch.sig.equals(comp)); return (archFormat.sig.equals(comp));
}); });
cb(detected ? null : new Error('Unknown type'), detected); return cb(archFormat ? null : Errors.General('Unknown type'), archFormat);
}); });
}); });
} }
@ -157,7 +182,7 @@ module.exports = class ArchiveUtil {
const fmtObj = { const fmtObj = {
archivePath : archivePath, archivePath : archivePath,
fileList : files.join(' '), fileList : files.join(' '), // :TODO: probably need same hack as extractTo here!
}; };
const args = archiver.compress.args.map( arg => stringFormat(arg, fmtObj) ); const args = archiver.compress.args.map( arg => stringFormat(arg, fmtObj) );
@ -166,7 +191,17 @@ module.exports = class ArchiveUtil {
return this.spawnHandler(proc, 'Compression', cb); return this.spawnHandler(proc, 'Compression', cb);
} }
extractTo(archivePath, extractPath, archType, cb) { extractTo(archivePath, extractPath, archType, fileList, cb) {
let haveFileList;
if(!cb && _.isFunction(fileList)) {
cb = fileList;
fileList = [];
haveFileList = false;
} else {
haveFileList = true;
}
const archiver = this.getArchiver(archType); const archiver = this.getArchiver(archType);
if(!archiver) { if(!archiver) {
@ -178,10 +213,22 @@ module.exports = class ArchiveUtil {
extractPath : extractPath, extractPath : extractPath,
}; };
const args = archiver.decompress.args.map( arg => stringFormat(arg, fmtObj) ); const action = haveFileList ? 'extract' : 'decompress';
const proc = pty.spawn(archiver.decompress.cmd, args, this.getPtyOpts());
return this.spawnHandler(proc, 'Decompression', cb); // we need to treat {fileList} special in that it should be broken up to 0:n args
const args = archiver[action].args.map( arg => {
return '{fileList}' === arg ? arg : stringFormat(arg, fmtObj);
});
const fileListPos = args.indexOf('{fileList}');
if(fileListPos > -1) {
// replace {fileList} with 0:n sep file list arguments
args.splice.apply(args, [fileListPos, 1].concat(fileList));
}
const proc = pty.spawn(archiver[action].cmd, args, this.getPtyOpts());
return this.spawnHandler(proc, (haveFileList ? 'Extraction' : 'Decompression'), cb);
} }
listEntries(archivePath, archType, cb) { listEntries(archivePath, archType, cb) {

View File

@ -7,7 +7,6 @@ const miscUtil = require('./misc_util.js');
const ansi = require('./ansi_term.js'); const ansi = require('./ansi_term.js');
const aep = require('./ansi_escape_parser.js'); const aep = require('./ansi_escape_parser.js');
const sauce = require('./sauce.js'); const sauce = require('./sauce.js');
const farmhash = require('farmhash');
// deps // deps
const fs = require('fs'); const fs = require('fs');
@ -15,6 +14,7 @@ const paths = require('path');
const assert = require('assert'); const assert = require('assert');
const iconv = require('iconv-lite'); const iconv = require('iconv-lite');
const _ = require('lodash'); const _ = require('lodash');
const farmhash = require('farmhash');
exports.getArt = getArt; exports.getArt = getArt;
exports.getArtFromPath = getArtFromPath; exports.getArtFromPath = getArtFromPath;

View File

@ -211,49 +211,25 @@ function getDefaultConfig() {
} }
}, },
archivers : { archives : {
zip : { archivers : {
sig : '504b0304',
offset : 0,
compress : {
cmd : '7z',
args : [ 'a', '-tzip', '{archivePath}', '{fileList}' ],
},
decompress : {
cmd : '7z',
args : [ 'e', '-o{extractPath}', '{archivePath}' ]
},
list : {
cmd : '7z',
args : [ 'l', '{archivePath}' ],
entryMatch : '^[0-9]{4}-[0-9]{2}-[0-9]{2}\\s[0-9]{2}:[0-9]{2}:[0-9]{2}\\s[A-Za-z\\.]{5}\\s+([0-9]+)\\s+[0-9]+\\s+([^\\r\\n]+)$',
},
extract : {
cmd : '7z',
args : [ 'x', '-o{extractPath}', '{archivePath}', '{fileList}' ],
},
},
},
archivers2 : {
tools : {
'7Zip' : { '7Zip' : {
compress : { compress : {
cmd : '7z', cmd : '7za',
args : [ 'a', '-tzip', '{archivePath}', '{fileList}' ], args : [ 'a', '-tzip', '{archivePath}', '{fileList}' ],
}, },
decompress : { decompress : {
cmd : '7z', cmd : '7za',
args : [ 'e', '-o{extractPath}', '{archivePath}' ] args : [ 'e', '-o{extractPath}', '{archivePath}' ]
}, },
list : { list : {
cmd : '7z', cmd : '7za',
args : [ 'l', '{archivePath}' ], args : [ 'l', '{archivePath}' ],
entryMatch : '^[0-9]{4}-[0-9]{2}-[0-9]{2}\\s[0-9]{2}:[0-9]{2}:[0-9]{2}\\s[A-Za-z\\.]{5}\\s+([0-9]+)\\s+[0-9]+\\s+([^\\r\\n]+)$', entryMatch : '^[0-9]{4}-[0-9]{2}-[0-9]{2}\\s[0-9]{2}:[0-9]{2}:[0-9]{2}\\s[A-Za-z\\.]{5}\\s+([0-9]+)\\s+[0-9]+\\s+([^\\r\\n]+)$',
}, },
extract : { extract : {
cmd : '7z', cmd : '7za',
args : [ 'x', '-o{extractPath}', '{archivePath}', '{fileList}' ], args : [ 'e', '-o{extractPath}', '{archivePath}', '{fileList}' ],
}, },
} }
}, },
@ -262,25 +238,25 @@ function getDefaultConfig() {
sig : '504b0304', sig : '504b0304',
offset : 0, offset : 0,
exts : [ 'zip' ], exts : [ 'zip' ],
tool : '7Zip', handler : '7Zip',
}, },
'7z' : { '7z' : {
sig : '377abcaf271c', sig : '377abcaf271c',
offset : 0, offset : 0,
exts : [ '7z' ], exts : [ '7z' ],
tool : '7Zip', handler : '7Zip',
}, },
arj : { arj : {
sig : '60ea', sig : '60ea',
offset : 0, offset : 0,
exts : [ 'arj' ], exts : [ 'arj' ],
tool : '7Zip', handler : '7Zip',
}, },
rar : { rar : {
sig : '526172211a0700', sig : '526172211a0700',
offset : 0, offset : 0,
exts : [ 'rar' ], exts : [ 'rar' ],
tool : '7Zip', handler : '7Zip',
} }
} }
}, },
@ -367,6 +343,16 @@ function getDefaultConfig() {
longDesc : [ '^.*\.NFO$', '^README\.1ST$', '^README\.TXT$' ], longDesc : [ '^.*\.NFO$', '^README\.1ST$', '^README\.TXT$' ],
}, },
yearEstPatterns: [
//
// Patterns should produce the year in the first submatch
// The year may be YY or YYYY
//
'[0-3]?[0-9][\\-\\/\\.][0-3]?[0-9][\\-\\/\\.]((?:[0-9]{2})?[0-9]{2})', // m/d/yyyy, mm-dd-yyyy, etc.
"\\B('[1789][0-9])\\b",
// :TODO: DD/MMM/YY, DD/MMMM/YY, DD/MMM/YYYY, etc.
],
areas: { areas: {
message_attachment : { message_attachment : {
name : 'Message attachments', name : 'Message attachments',

View File

@ -15,6 +15,8 @@ const async = require('async');
const fs = require('fs'); const fs = require('fs');
const crypto = require('crypto'); const crypto = require('crypto');
const paths = require('path'); const paths = require('path');
const temp = require('temp').track(); // track() cleans up temp dir/files for us
const iconv = require('iconv-lite');
exports.getAvailableFileAreas = getAvailableFileAreas; exports.getAvailableFileAreas = getAvailableFileAreas;
exports.getSortedAvailableFileAreas = getSortedAvailableFileAreas; exports.getSortedAvailableFileAreas = getSortedAvailableFileAreas;
@ -140,6 +142,41 @@ function getExistingFileEntriesBySha1(sha1, cb) {
); );
} }
// :TODO: This is bascially sliceAtEOF() from art.js .... DRY!
function sliceAtSauceMarker(data) {
let eof = data.length;
const stopPos = Math.max(data.length - (256), 0); // 256 = 2 * sizeof(SAUCE)
for(let i = eof - 1; i > stopPos; i--) {
if(0x1a === data[i]) {
eof = i;
break;
}
}
return data.slice(0, eof);
}
function getEstYear(input) {
// :TODO: yearEstPatterns RegExp's should be cached - we can do this @ Config (re)load time
const patterns = Config.fileBase.yearEstPatterns.map( p => new RegExp(p, 'gmi'));
let match;
for(let i = 0; i < patterns.length; ++i) {
match = patterns[i].exec(input);
if(match) {
break;
}
}
if(match) {
if(2 == match[1].length) {
return parseInt('19' + match[1]);
} else {
return parseInt(match[1]);
}
}
}
function addNewArchiveFileEnty(fileEntry, filePath, archiveType, cb) { function addNewArchiveFileEnty(fileEntry, filePath, archiveType, cb) {
const archiveUtil = ArchiveUtil.getInstance(); const archiveUtil = ArchiveUtil.getInstance();
@ -153,15 +190,78 @@ function addNewArchiveFileEnty(fileEntry, filePath, archiveType, cb) {
function extractDescFiles(entries, callback) { function extractDescFiles(entries, callback) {
// :TODO: would be nice if these RegExp's were cached // :TODO: would be nice if these RegExp's were cached
// :TODO: this is long winded...
const extractList = [];
const shortDescFile = entries.find( e => { const shortDescFile = entries.find( e => {
return Config.fileBase.fileNamePatterns.shortDesc.find( pat => new RegExp(pat, 'i').test(e.fileName) ); return Config.fileBase.fileNamePatterns.shortDesc.find( pat => new RegExp(pat, 'i').test(e.fileName) );
}); });
if(shortDescFile) {
extractList.push(shortDescFile.fileName);
}
const longDescFile = entries.find( e => { const longDescFile = entries.find( e => {
return Config.fileBase.fileNamePatterns.longDesc.find( pat => new RegExp(pat, 'i').test(e.fileName) ); return Config.fileBase.fileNamePatterns.longDesc.find( pat => new RegExp(pat, 'i').test(e.fileName) );
}); });
return callback(null); if(longDescFile) {
extractList.push(longDescFile.fileName);
}
temp.mkdir('enigextract-', (err, tempDir) => {
if(err) {
return callback(err);
}
archiveUtil.extractTo(filePath, tempDir, archiveType, extractList, err => {
if(err) {
return callback(err);
}
const descFiles = {
desc : shortDescFile ? paths.join(tempDir, shortDescFile.fileName) : null,
descLong : longDescFile ? paths.join(tempDir, longDescFile.fileName) : null,
};
return callback(null, descFiles);
});
});
},
function readDescFiles(descFiles, callback) {
// :TODO: we shoudl probably omit files that are too large
async.each(Object.keys(descFiles), (descType, next) => {
const path = descFiles[descType];
if(!path) {
return next(null);
}
fs.readFile(path, (err, data) => {
if(err || !data) {
return next(null);
}
//
// Assume FILE_ID.DIZ, NFO files, etc. are CP437.
//
// :TODO: This isn't really always the case - how to handle this? We could do a quick detection...
fileEntry[descType] = iconv.decode(sliceAtSauceMarker(data, 0x1a), 'cp437');
return next(null);
});
}, () => {
// cleanup, but don't wait...
temp.cleanup( err => {
// :TODO: Log me!
});
return callback(null);
});
},
function attemptReleaseYearEstimation(callback) {
let estYear;
if(fileEntry.descLong) {
estYear = getEstYear(fileEntry.descLong);
}
} }
], ],
err => { err => {
@ -240,6 +340,17 @@ function addOrUpdateFileEntry(areaInfo, fileName, options, cb) {
if(existingEntries.length > 0) { if(existingEntries.length > 0) {
} else { } else {
//
// Some basics for new entries
//
fileEntry.meta.user_rating = 0;
if(options.uploadByUserName) {
fileEntry.meta.upload_by_username = options.uploadByUserName;
}
if(options.uploadByUserId) {
fileEntry.meta.upload_by_user_id = options.uploadByUserId;
}
return addNewFileEntry(fileEntry, filePath, callback); return addNewFileEntry(fileEntry, filePath, callback);
} }
}, },