* Lots of improvements to info extraction system

* MUCH faster browsing in file base
* Better release year estimation
* New tool: exiftool2desc
This commit is contained in:
Bryan Ashby 2017-05-19 18:41:13 -06:00
parent b8595f667d
commit 54a7f4f774
8 changed files with 226 additions and 80 deletions

View File

@ -223,6 +223,14 @@ function getDefaultConfig() {
privateKeyPem : paths.join(__dirname, './../misc/ssh_private_key.pem'), privateKeyPem : paths.join(__dirname, './../misc/ssh_private_key.pem'),
firstMenu : 'sshConnected', firstMenu : 'sshConnected',
firstMenuNewUser : 'sshConnectedNewUser', firstMenuNewUser : 'sshConnectedNewUser',
},
webSocket : {
port : 8810,
enabled : true, // :TODO: default to false
},
secureWebSocket : {
port : 8811,
enabled : false,
} }
}, },
@ -264,6 +272,19 @@ function getDefaultConfig() {
} }
}, },
infoExtractUtils : {
Exiftool2Desc : {
cmd : `${__dirname}/../util/exiftool2desc.js`, // ensure chmod +x
},
Exiftool : {
cmd : 'exiftool',
args : [
'-charset', 'utf8', '{filePath}',
'--directory', '--filepermissions', '--exiftoolversion', '--filename', '--filesize', '--filemodifydate', '--fileaccessdate', '--fileinodechangedate'
]
}
},
fileTypes : { fileTypes : {
// //
// File types explicitly known to the system. Here we can configure // File types explicitly known to the system. Here we can configure
@ -284,16 +305,36 @@ function getDefaultConfig() {
// //
'audio/mpeg' : { 'audio/mpeg' : {
desc : 'MP3 Audio', desc : 'MP3 Audio',
shortDescUtil : { shortDescUtil : 'Exiftool2Desc',
cmd : `${__dirname}/../util/exiftool2desc.js`, // ensure chmod +x longDescUtil : 'Exiftool',
args : [ '{filePath}' ],
},
}, },
'application/pdf' : { 'application/pdf' : {
desc : 'Adobe PDF', desc : 'Adobe PDF',
shortDescUtil : { shortDescUtil : 'Exiftool2Desc',
cmd : `${__dirname}/../util/exiftool2desc.js` longDescUtil : 'Exiftool',
} },
//
// Images
//
'image/jpeg' : {
desc : 'JPEG Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
'image/png' : {
desc : 'Portable Network Graphic Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
'image/gif' : {
desc : 'Graphics Interchange Format Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
},
'image/webp' : {
desc : 'WebP Image',
shortDescUtil : 'Exiftool2Desc',
longDescUtil : 'Exiftool',
}, },
// //
// Archives // Archives
@ -579,8 +620,9 @@ function getDefaultConfig() {
"\\b('[1789][0-9])\\b", // eslint-disable-line quotes "\\b('[1789][0-9])\\b", // eslint-disable-line quotes
'\\b[0-3]?[0-9][\\-\\/\\.](?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december)[\\-\\/\\.]((?:[0-9]{2})?[0-9]{2})\\b', '\\b[0-3]?[0-9][\\-\\/\\.](?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december)[\\-\\/\\.]((?:[0-9]{2})?[0-9]{2})\\b',
'\\b(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december),?\\s[0-9]+(?:st|nd|rd|th)?,?\\s((?:[0-9]{2})?[0-9]{2})\\b', // November 29th, 1997 '\\b(?:jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|january|february|march|april|may|june|july|august|september|october|november|december),?\\s[0-9]+(?:st|nd|rd|th)?,?\\s((?:[0-9]{2})?[0-9]{2})\\b', // November 29th, 1997
'\\(((?:19|20)[0-9]{2})\\)', // (19xx) or (20xx) -- do this before 19xx 20xx such that this has priority
'\\b((?:19|20)[0-9]{2})\\b', // simple 19xx or 20xx with word boundaries
// :TODO: DD/MMM/YY, DD/MMMM/YY, DD/MMM/YYYY, etc. // :TODO: DD/MMM/YY, DD/MMMM/YY, DD/MMM/YYYY, etc.
// :TODO: "Copyright YYYY someone"
], ],
web : { web : {

View File

@ -23,6 +23,7 @@ const paths = require('path');
const temptmp = require('temptmp').createTrackedSession('file_area'); const temptmp = require('temptmp').createTrackedSession('file_area');
const iconv = require('iconv-lite'); const iconv = require('iconv-lite');
const exec = require('child_process').exec; const exec = require('child_process').exec;
const moment = require('moment');
exports.isInternalArea = isInternalArea; exports.isInternalArea = isInternalArea;
exports.getAvailableFileAreas = getAvailableFileAreas; exports.getAvailableFileAreas = getAvailableFileAreas;
@ -214,7 +215,7 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
const patterns = Config.fileBase.yearEstPatterns.map( p => new RegExp(p, 'gmi')); const patterns = Config.fileBase.yearEstPatterns.map( p => new RegExp(p, 'gmi'));
function getMatch(input) { function getMatch(input) {
if(input) { if(input) {
let m; let m;
for(let i = 0; i < patterns.length; ++i) { for(let i = 0; i < patterns.length; ++i) {
m = patterns[i].exec(input); m = patterns[i].exec(input);
@ -228,6 +229,10 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
// //
// We attempt detection in short -> long order // We attempt detection in short -> long order
// //
// Throw out anything that is current_year + 2 (we give some leway)
// with the assumption that must be wrong.
//
const maxYear = moment().add(2, 'year').year();
const match = getMatch(fileEntry.desc) || getMatch(fileEntry.descLong); const match = getMatch(fileEntry.desc) || getMatch(fileEntry.descLong);
if(match && match[1]) { if(match && match[1]) {
let year; let year;
@ -244,7 +249,7 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
year = parseInt(match[1]); year = parseInt(match[1]);
} }
if(year) { if(year && year <= maxYear) {
fileEntry.meta.est_release_year = year; fileEntry.meta.est_release_year = year;
} }
} }
@ -394,6 +399,20 @@ function populateFileEntryWithArchive(fileEntry, filePath, stepInfo, iterator, c
); );
} }
function getInfoExtractUtilForDesc(mimeType, descType) {
let util = _.get(Config, [ 'fileTypes', mimeType, `${descType}DescUtil` ]);
if(!_.isString(util)) {
return;
}
util = _.get(Config, [ 'infoExtractUtils', util ]);
if(!util || !_.isString(util.cmd)) {
return;
}
return util;
}
function populateFileEntryNonArchive(fileEntry, filePath, stepInfo, iterator, cb) { function populateFileEntryNonArchive(fileEntry, filePath, stepInfo, iterator, cb) {
async.series( async.series(
@ -408,27 +427,42 @@ function populateFileEntryNonArchive(fileEntry, filePath, stepInfo, iterator, cb
return callback(null); return callback(null);
} }
const shortDescUtil = _.get(Config, [ 'fileTypes', mimeType, 'shortDescUtil' ]); async.eachSeries( [ 'short', 'long' ], (descType, nextDesc) => {
if(!shortDescUtil || !shortDescUtil.cmd) { const util = getInfoExtractUtilForDesc(mimeType, descType);
return callback(null); if(!util) {
} return nextDesc(null);
const args = (shortDescUtil.args || [ '{filePath} '] ).map( arg => stringFormat(arg, { filePath : filePath } ) );
exec(`${shortDescUtil.cmd} ${args.join(' ')}`, (err, stdout) => {
if(err) {
logDebug(
{ error : err.message, cmd : shortDescUtil.cmd, args : args },
'Short description command failed'
);
} else {
//
// "...no more than 45 characters long" -- FILE_ID.DIZ v1.9 spec:
// http://www.textfiles.com/computers/fileid.txt
//
fileEntry.desc = (wordWrapText( (stdout || '').trim(), { width : 45 } ).wrapped || []).join('\n');
} }
const args = (util.args || [ '{filePath} '] ).map( arg => stringFormat(arg, { filePath : filePath } ) );
exec(`${util.cmd} ${args.join(' ')}`, (err, stdout) => {
if(err) {
logDebug(
{ error : err.message, cmd : util.cmd, args : args },
`${_.upperFirst(descType)} description command failed`
);
} else {
stdout = (stdout || '').trim();
if(stdout.length > 0) {
const key = 'short' === descType ? 'desc' : 'descLong';
if('desc' === key) {
//
// Word wrap short descriptions to FILE_ID.DIZ spec
//
// "...no more than 45 characters long"
//
// See http://www.textfiles.com/computers/fileid.txt
//
stdout = (wordWrapText( stdout, { width : 45 } ).wrapped || []).join('\n');
}
fileEntry[key] = stdout;
}
}
return nextDesc(null);
});
}, () => {
return callback(null); return callback(null);
}); });
}, },

View File

@ -396,13 +396,13 @@ module.exports = class FileEntry {
FROM file_user_rating FROM file_user_rating
WHERE file_id = f.file_id) WHERE file_id = f.file_id)
AS avg_rating AS avg_rating
FROM file f, file_meta m`; FROM file f`;
sqlOrderBy = `ORDER BY avg_rating ${sqlOrderDir}`; sqlOrderBy = `ORDER BY avg_rating ${sqlOrderDir}`;
} else { } else {
sql = sql =
`SELECT DISTINCT f.file_id, f.${filter.sort} `SELECT DISTINCT f.file_id, f.${filter.sort}
FROM file f, file_meta m`; FROM file f`;
sqlOrderBy = getOrderByWithCast(`f.${filter.sort}`) + ' ' + sqlOrderDir; sqlOrderBy = getOrderByWithCast(`f.${filter.sort}`) + ' ' + sqlOrderDir;
} }
@ -410,7 +410,7 @@ module.exports = class FileEntry {
} else { } else {
sql = sql =
`SELECT DISTINCT f.file_id `SELECT DISTINCT f.file_id
FROM file f, file_meta m`; FROM file f`;
sqlOrderBy = `${getOrderByWithCast('f.file_id')} ${sqlOrderDir}`; sqlOrderBy = `${getOrderByWithCast('f.file_id')} ${sqlOrderDir}`;
} }

View File

@ -6,5 +6,9 @@ const mimeTypes = require('mime-types');
exports.resolveMimeType = resolveMimeType; exports.resolveMimeType = resolveMimeType;
function resolveMimeType(query) { function resolveMimeType(query) {
return mimeTypes.extension(query) || mimeTypes.lookup(query) || undefined; // lookup() returns false; we want undefined if(mimeTypes.extensions[query]) {
return query; // alreaed a mime-type
}
return mimeTypes.lookup(query) || undefined; // lookup() returns false; we want undefined
} }

View File

@ -378,10 +378,11 @@ function getThemeArt(options, cb) {
options.random = _.isBoolean(options.random) ? options.random : true; // FILENAME<n>.EXT support options.random = _.isBoolean(options.random) ? options.random : true; // FILENAME<n>.EXT support
// //
// We look for themed art in the following manor: // We look for themed art in the following order:
// * Supplied theme via |themeId| // 1) Direct/relative path
// * Fallback 1: Default theme (if different than |themeId|) // 2) Via theme supplied by |themeId|
// * General art directory // 3) Via default theme
// 4) General art directory
// //
async.waterfall( async.waterfall(
[ [
@ -389,7 +390,7 @@ function getThemeArt(options, cb) {
// //
// We allow relative (to enigma-bbs) or full paths // We allow relative (to enigma-bbs) or full paths
// //
if('/' === options.name[0]) { if('/' === options.name.charAt(0)) {
// just take the path as-is // just take the path as-is
options.basePath = paths.dirname(options.name); options.basePath = paths.dirname(options.name);
} else if(options.name.indexOf('/') > -1) { } else if(options.name.indexOf('/') > -1) {
@ -409,41 +410,35 @@ function getThemeArt(options, cb) {
} }
options.basePath = paths.join(Config.paths.themes, options.themeId); options.basePath = paths.join(Config.paths.themes, options.themeId);
art.getArt(options.name, options, (err, artInfo) => {
art.getArt(options.name, options, function artLoaded(err, artInfo) { return callback(null, artInfo);
callback(null, artInfo);
}); });
}, },
function fromDefaultTheme(artInfo, callback) { function fromDefaultTheme(artInfo, callback) {
if(artInfo || Config.defaults.theme === options.themeId) { if(artInfo || Config.defaults.theme === options.themeId) {
callback(null, artInfo); return callback(null, artInfo);
} else {
options.basePath = paths.join(Config.paths.themes, Config.defaults.theme);
art.getArt(options.name, options, function artLoaded(err, artInfo) {
callback(null, artInfo);
});
} }
options.basePath = paths.join(Config.paths.themes, Config.defaults.theme);
art.getArt(options.name, options, (err, artInfo) => {
return callback(null, artInfo);
});
}, },
function fromGeneralArtDir(artInfo, callback) { function fromGeneralArtDir(artInfo, callback) {
if(artInfo) { if(artInfo) {
callback(null, artInfo); return callback(null, artInfo);
} else {
options.basePath = Config.paths.art;
art.getArt(options.name, options, function artLoaded(err, artInfo) {
callback(err, artInfo);
});
} }
options.basePath = Config.paths.art;
art.getArt(options.name, options, (err, artInfo) => {
return callback(err, artInfo);
});
} }
], ],
function complete(err, artInfo) { function complete(err, artInfo) {
if(err) { if(err) {
if(options.client) { const logger = _.get(options, 'client.log') || Log;
options.client.log.debug( { error : err.message }, 'Cannot find theme art' ); logger.debug( { reason : err.message }, 'Cannot find theme art');
} else {
Log.debug( { error : err.message }, 'Cannot find theme art' );
}
} }
return cb(err, artInfo); return cb(err, artInfo);
} }

View File

@ -2372,6 +2372,17 @@
] ]
focusItemIndex: 1 focusItemIndex: 1
} }
// :TODO: these can be removed once the hack is not required:
TL10: {}
TL11: {}
TL12: {}
TL13: {}
TL14: {}
TL15: {}
TL16: {}
TL17: {}
TL18: {}
} }
submit: { submit: {
@ -2453,6 +2464,17 @@
"general", "nfo/readme", "file listing" "general", "nfo/readme", "file listing"
] ]
} }
// :TODO: these can be removed once the hack is not required:
TL10: {}
TL11: {}
TL12: {}
TL13: {}
TL14: {}
TL15: {}
TL16: {}
TL17: {}
TL18: {}
} }
actionKeys: [ actionKeys: [

View File

@ -42,7 +42,8 @@
"ssh2": "^0.5.1", "ssh2": "^0.5.1",
"temptmp": "^1.0.0", "temptmp": "^1.0.0",
"uuid": "^3.0.1", "uuid": "^3.0.1",
"uuid-parse": "^1.0.0" "uuid-parse": "^1.0.0",
"ws" : "^2.3.1"
}, },
"devDependencies": {}, "devDependencies": {},
"engines": { "engines": {

View File

@ -8,9 +8,68 @@
const exiftool = require('exiftool'); const exiftool = require('exiftool');
const fs = require('fs'); const fs = require('fs');
const moment = require('moment');
const TOOL_VERSION = '1.0.0.0';
// map fileTypes -> handlers
const FILETYPE_HANDLERS = {};
[ 'AIFF', 'APE', 'FLAC', 'OGG', 'MP3' ].forEach(ext => FILETYPE_HANDLERS[ext] = audioFile);
[ 'PDF', 'DOC', 'DOCX', 'DOCM', 'ODB', 'ODC', 'ODF', 'ODG', 'ODI', 'ODP', 'ODS', 'ODT' ].forEach(ext => FILETYPE_HANDLERS[ext] = documentFile);
[ 'PNG', 'JPEG', 'GIF', 'WEBP', 'XCF' ].forEach(ext => FILETYPE_HANDLERS[ext] = imageFile);
function audioFile(metadata) {
let desc = `${metadata.artist||'Unknown Artist'} - ${metadata.title||'Unknown'} (`;
if(metadata.year) {
desc += `${metadata.year}, `;
}
desc += `${metadata.audioBitrate})`;
return desc;
}
function documentFile(metadata) {
let desc = `${metadata.author||'Unknown Author'} - ${metadata.title||'Unknown'}`;
const created = moment(metadata.createdate);
if(created.isValid()) {
desc += ` (${created.format('YYYY')})`;
}
return desc;
}
function imageFile(metadata) {
let desc = `${metadata.fileType} image (`;
if(metadata.animationIterations) {
desc += 'Animated, ';
}
desc += `${metadata.imageSize}px`;
const created = moment(metadata.createdate);
if(created.isValid()) {
desc += `, ${created.format('YYYY')})`;
} else {
desc += ')';
}
return desc;
}
function main() { function main() {
const path = process.argv[2]; const argv = exports.argv = require('minimist')(process.argv.slice(2), {
alias : {
h : 'help',
v : 'version',
}
});
if(argv.version) {
console.info(TOOL_VERSION);
return 0;
}
if(0 === argv._.length || argv.help) {
console.info('usage: exiftool2desc.js [--version] [--help] PATH');
return 0;
}
const path = argv._[0];
fs.readFile(path, (err, data) => { fs.readFile(path, (err, data) => {
if(err) { if(err) {
@ -19,26 +78,15 @@ function main() {
exiftool.metadata(data, (err, metadata) => { exiftool.metadata(data, (err, metadata) => {
if(err) { if(err) {
return -1;
}
const handler = FILETYPE_HANDLERS[metadata.fileType];
if(!handler) {
return -1; return -1;
} }
switch(metadata.fileType) { console.info(handler(metadata));
case 'AIFF' :
case 'APE' :
case 'FLAC' :
case 'OGG' :
case 'MP3' :
console.log(`${metadata.artist||'Unknown Artist'} - ${metadata.title||'Unknown'} (${metadata.audioBitrate})`);
break;
case 'PDF' :
console.log(`${metadata.author||'Unknown Author'} - ${metadata.title||'Unknown'}`);
break;
default :
return -1;
}
return 0; return 0;
}); });
}); });