* Lots of improvements to info extraction system

* MUCH faster browsing in file base
* Better release year estimation
* New tool: exiftool2desc
This commit is contained in:
Bryan Ashby 2017-05-19 18:41:13 -06:00
parent b8595f667d
commit 54a7f4f774
8 changed files with 226 additions and 80 deletions

View File

@ -223,6 +223,14 @@ function getDefaultConfig() {
privateKeyPem : paths.join(__dirname, './../misc/ssh_private_key.pem'),
firstMenu : 'sshConnected',
firstMenuNewUser : 'sshConnectedNewUser',
},
webSocket : {
port : 8810,
enabled : true, // :TODO: default to false
},
secureWebSocket : {
port : 8811,
enabled : false,
}
},
@ -264,6 +272,19 @@ function getDefaultConfig() {
}
},
infoExtractUtils : {
Exiftool2Desc : {
cmd : `${__dirname}/../util/exiftool2desc.js`, // ensure chmod +x
},
Exiftool : {
cmd : 'exiftool',
args : [
'-charset', 'utf8', '{filePath}',
'--directory', '--filepermissions', '--exiftoolversion', '--filename', '--filesize', '--filemodifydate', '--fileaccessdate', '--fileinodechangedate'
]
}
},
fileTypes : {
//
// File types explicitly known to the system. Here we can configure
@ -284,16 +305,36 @@ function getDefaultConfig() {
//
'audio/mpeg' : {
desc : 'MP3 Audio',
shortDescUtil : {
cmd : `${__dirname}/../util/exiftool2desc.js`, // ensure chmod +x
args : [ '{filePath}' ],
},
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
'application/pdf' : {
desc : 'Adobe PDF',
shortDescUtil : {
cmd : `${__dirname}/../util/exiftool2desc.js`
}
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
//
// Images
//
'image/jpeg' : {
desc : 'JPEG Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
'image/png' : {
desc : 'Portable Network Graphic Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
'image/gif' : {
desc : 'Graphics Interchange Format Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
'image/webp' : {
desc : 'WebP Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
//
// Archives
@ -579,8 +620,9 @@ function getDefaultConfig() {
"\\b('[1789][0-9])\\b", // eslint-disable-line quotes
'\\b[0-3]?[0-9][\\-\\/\\.](?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december)[\\-\\/\\.]((?:[0-9]{2})?[0-9]{2})\\b',
'\\b(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december),?\\s[0-9]+(?:st|nd|rd|th)?,?\\s((?:[0-9]{2})?[0-9]{2})\\b', // November 29th, 1997
'\\(((?:19|20)[0-9]{2})\\)', // (19xx) or (20xx) -- do this before 19xx 20xx such that this has priority
'\\b((?:19|20)[0-9]{2})\\b', // simple 19xx or 20xx with word boundaries
// :TODO: DD/MMM/YY, DD/MMMM/YY, DD/MMM/YYYY, etc.
// :TODO: "Copyright YYYY someone"
],
web : {

View File

@ -23,6 +23,7 @@ const paths = require('path');
const temptmp = require('temptmp').createTrackedSession('file_area');
const iconv = require('iconv-lite');
const exec = require('child_process').exec;
const moment = require('moment');
exports.isInternalArea = isInternalArea;
exports.getAvailableFileAreas = getAvailableFileAreas;
@ -228,6 +229,10 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
//
// We attempt detection in short -> long order
//
// Throw out anything that is current_year + 2 (we give some leway)
// with the assumption that must be wrong.
//
const maxYear = moment().add(2, 'year').year();
const match = getMatch(fileEntry.desc) || getMatch(fileEntry.descLong);
if(match && match[1]) {
let year;
@ -244,7 +249,7 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
year = parseInt(match[1]);
}
if(year) {
if(year && year <= maxYear) {
fileEntry.meta.est_release_year = year;
}
}
@ -394,6 +399,20 @@ function populateFileEntryWithArchive(fileEntry, filePath, stepInfo, iterator, c
);
}
function getInfoExtractUtilForDesc(mimeType, descType) {
let util = _.get(Config, [ 'fileTypes', mimeType, `${descType}DescUtil` ]);
if(!_.isString(util)) {
return;
}
util = _.get(Config, [ 'infoExtractUtils', util ]);
if(!util || !_.isString(util.cmd)) {
return;
}
return util;
}
function populateFileEntryNonArchive(fileEntry, filePath, stepInfo, iterator, cb) {
async.series(
@ -408,27 +427,42 @@ function populateFileEntryNonArchive(fileEntry, filePath, stepInfo, iterator, cb
return callback(null);
}
const shortDescUtil = _.get(Config, [ 'fileTypes', mimeType, 'shortDescUtil' ]);
if(!shortDescUtil || !shortDescUtil.cmd) {
return callback(null);
}
const args = (shortDescUtil.args || [ '{filePath} '] ).map( arg => stringFormat(arg, { filePath : filePath } ) );
exec(`${shortDescUtil.cmd} ${args.join(' ')}`, (err, stdout) => {
if(err) {
logDebug(
{ error : err.message, cmd : shortDescUtil.cmd, args : args },
'Short description command failed'
);
} else {
//
// "...no more than 45 characters long" -- FILE_ID.DIZ v1.9 spec:
// http://www.textfiles.com/computers/fileid.txt
//
fileEntry.desc = (wordWrapText( (stdout || '').trim(), { width : 45 } ).wrapped || []).join('\n');
async.eachSeries( [ 'short', 'long' ], (descType, nextDesc) => {
const util = getInfoExtractUtilForDesc(mimeType, descType);
if(!util) {
return nextDesc(null);
}
const args = (util.args || [ '{filePath} '] ).map( arg => stringFormat(arg, { filePath : filePath } ) );
exec(`${util.cmd} ${args.join(' ')}`, (err, stdout) => {
if(err) {
logDebug(
{ error : err.message, cmd : util.cmd, args : args },
`${_.upperFirst(descType)} description command failed`
);
} else {
stdout = (stdout || '').trim();
if(stdout.length > 0) {
const key = 'short' === descType ? 'desc' : 'descLong';
if('desc' === key) {
//
// Word wrap short descriptions to FILE_ID.DIZ spec
//
// "...no more than 45 characters long"
//
// See http://www.textfiles.com/computers/fileid.txt
//
stdout = (wordWrapText( stdout, { width : 45 } ).wrapped || []).join('\n');
}
fileEntry[key] = stdout;
}
}
return nextDesc(null);
});
}, () => {
return callback(null);
});
},

View File

@ -396,13 +396,13 @@ module.exports = class FileEntry {
FROM file_user_rating
WHERE file_id = f.file_id)
AS avg_rating
FROM file f, file_meta m`;
FROM file f`;
sqlOrderBy = `ORDER BY avg_rating ${sqlOrderDir}`;
} else {
sql =
`SELECT DISTINCT f.file_id, f.${filter.sort}
FROM file f, file_meta m`;
FROM file f`;
sqlOrderBy = getOrderByWithCast(`f.${filter.sort}`) + ' ' + sqlOrderDir;
}
@ -410,7 +410,7 @@ module.exports = class FileEntry {
} else {
sql =
`SELECT DISTINCT f.file_id
FROM file f, file_meta m`;
FROM file f`;
sqlOrderBy = `${getOrderByWithCast('f.file_id')} ${sqlOrderDir}`;
}

View File

@ -6,5 +6,9 @@ const mimeTypes = require('mime-types');
exports.resolveMimeType = resolveMimeType;
function resolveMimeType(query) {
return mimeTypes.extension(query) || mimeTypes.lookup(query) || undefined; // lookup() returns false; we want undefined
if(mimeTypes.extensions[query]) {
return query; // alreaed a mime-type
}
return mimeTypes.lookup(query) || undefined; // lookup() returns false; we want undefined
}

View File

@ -378,10 +378,11 @@ function getThemeArt(options, cb) {
options.random = _.isBoolean(options.random) ? options.random : true; // FILENAME<n>.EXT support
//
// We look for themed art in the following manor:
// * Supplied theme via |themeId|
// * Fallback 1: Default theme (if different than |themeId|)
// * General art directory
// We look for themed art in the following order:
// 1) Direct/relative path
// 2) Via theme supplied by |themeId|
// 3) Via default theme
// 4) General art directory
//
async.waterfall(
[
@ -389,7 +390,7 @@ function getThemeArt(options, cb) {
//
// We allow relative (to enigma-bbs) or full paths
//
if('/' === options.name[0]) {
if('/' === options.name.charAt(0)) {
// just take the path as-is
options.basePath = paths.dirname(options.name);
} else if(options.name.indexOf('/') > -1) {
@ -409,41 +410,35 @@ function getThemeArt(options, cb) {
}
options.basePath = paths.join(Config.paths.themes, options.themeId);
art.getArt(options.name, options, function artLoaded(err, artInfo) {
callback(null, artInfo);
art.getArt(options.name, options, (err, artInfo) => {
return callback(null, artInfo);
});
},
function fromDefaultTheme(artInfo, callback) {
if(artInfo || Config.defaults.theme === options.themeId) {
callback(null, artInfo);
} else {
options.basePath = paths.join(Config.paths.themes, Config.defaults.theme);
art.getArt(options.name, options, function artLoaded(err, artInfo) {
callback(null, artInfo);
});
return callback(null, artInfo);
}
options.basePath = paths.join(Config.paths.themes, Config.defaults.theme);
art.getArt(options.name, options, (err, artInfo) => {
return callback(null, artInfo);
});
},
function fromGeneralArtDir(artInfo, callback) {
if(artInfo) {
callback(null, artInfo);
} else {
options.basePath = Config.paths.art;
art.getArt(options.name, options, function artLoaded(err, artInfo) {
callback(err, artInfo);
});
return callback(null, artInfo);
}
options.basePath = Config.paths.art;
art.getArt(options.name, options, (err, artInfo) => {
return callback(err, artInfo);
});
}
],
function complete(err, artInfo) {
if(err) {
if(options.client) {
options.client.log.debug( { error : err.message }, 'Cannot find theme art' );
} else {
Log.debug( { error : err.message }, 'Cannot find theme art' );
}
const logger = _.get(options, 'client.log') || Log;
logger.debug( { reason : err.message }, 'Cannot find theme art');
}
return cb(err, artInfo);
}

View File

@ -2372,6 +2372,17 @@
]
focusItemIndex: 1
}
// :TODO: these can be removed once the hack is not required:
TL10: {}
TL11: {}
TL12: {}
TL13: {}
TL14: {}
TL15: {}
TL16: {}
TL17: {}
TL18: {}
}
submit: {
@ -2453,6 +2464,17 @@
"general", "nfo/readme", "file listing"
]
}
// :TODO: these can be removed once the hack is not required:
TL10: {}
TL11: {}
TL12: {}
TL13: {}
TL14: {}
TL15: {}
TL16: {}
TL17: {}
TL18: {}
}
actionKeys: [

View File

@ -42,7 +42,8 @@
"ssh2": "^0.5.1",
"temptmp": "^1.0.0",
"uuid": "^3.0.1",
"uuid-parse": "^1.0.0"
"uuid-parse": "^1.0.0",
"ws" : "^2.3.1"
},
"devDependencies": {},
"engines": {

View File

@ -8,9 +8,68 @@
const exiftool = require('exiftool');
const fs = require('fs');
const moment = require('moment');
const TOOL_VERSION = '1.0.0.0';
// map fileTypes -> handlers
const FILETYPE_HANDLERS = {};
[ 'AIFF', 'APE', 'FLAC', 'OGG', 'MP3' ].forEach(ext => FILETYPE_HANDLERS[ext] = audioFile);
[ 'PDF', 'DOC', 'DOCX', 'DOCM', 'ODB', 'ODC', 'ODF', 'ODG', 'ODI', 'ODP', 'ODS', 'ODT' ].forEach(ext => FILETYPE_HANDLERS[ext] = documentFile);
[ 'PNG', 'JPEG', 'GIF', 'WEBP', 'XCF' ].forEach(ext => FILETYPE_HANDLERS[ext] = imageFile);
function audioFile(metadata) {
let desc = `${metadata.artist||'Unknown Artist'} - ${metadata.title||'Unknown'} (`;
if(metadata.year) {
desc += `${metadata.year}, `;
}
desc += `${metadata.audioBitrate})`;
return desc;
}
function documentFile(metadata) {
let desc = `${metadata.author||'Unknown Author'} - ${metadata.title||'Unknown'}`;
const created = moment(metadata.createdate);
if(created.isValid()) {
desc += ` (${created.format('YYYY')})`;
}
return desc;
}
function imageFile(metadata) {
let desc = `${metadata.fileType} image (`;
if(metadata.animationIterations) {
desc += 'Animated, ';
}
desc += `${metadata.imageSize}px`;
const created = moment(metadata.createdate);
if(created.isValid()) {
desc += `, ${created.format('YYYY')})`;
} else {
desc += ')';
}
return desc;
}
function main() {
const path = process.argv[2];
const argv = exports.argv = require('minimist')(process.argv.slice(2), {
alias : {
h : 'help',
v : 'version',
}
});
if(argv.version) {
console.info(TOOL_VERSION);
return 0;
}
if(0 === argv._.length || argv.help) {
console.info('usage: exiftool2desc.js [--version] [--help] PATH');
return 0;
}
const path = argv._[0];
fs.readFile(path, (err, data) => {
if(err) {
@ -22,23 +81,12 @@ function main() {
return -1;
}
switch(metadata.fileType) {
case 'AIFF' :
case 'APE' :
case 'FLAC' :
case 'OGG' :
case 'MP3' :
console.log(`${metadata.artist||'Unknown Artist'} - ${metadata.title||'Unknown'} (${metadata.audioBitrate})`);
break;
case 'PDF' :
console.log(`${metadata.author||'Unknown Author'} - ${metadata.title||'Unknown'}`);
break;
default :
return -1;
const handler = FILETYPE_HANDLERS[metadata.fileType];
if(!handler) {
return -1;
}
console.info(handler(metadata));
return 0;
});
});