enigma-bbs/core/file_entry.js

764 lines
25 KiB
JavaScript

/* jslint node: true */
'use strict';
const fileDb = require('./database.js').dbs.file;
const Errors = require('./enig_error.js').Errors;
const {
getISOTimestampString,
sanitizeString
} = require('./database.js');
const Config = require('./config.js').get;
// deps
const async = require('async');
const _ = require('lodash');
const paths = require('path');
const fse = require('fs-extra');
const { unlink, readFile } = require('graceful-fs');
const crypto = require('crypto');
const moment = require('moment');
const FILE_TABLE_MEMBERS = [
'file_id', 'area_tag', 'file_sha256', 'file_name', 'storage_tag',
'desc', 'desc_long', 'upload_timestamp'
];
const FILE_WELL_KNOWN_META = {
// name -> *read* converter, if any
upload_by_username : null,
upload_by_user_id : (u) => parseInt(u) || 0,
file_md5 : null,
file_sha1 : null,
file_crc32 : null,
est_release_year : (y) => parseInt(y) || new Date().getFullYear(),
dl_count : (d) => parseInt(d) || 0,
byte_size : (b) => parseInt(b) || 0,
archive_type : null,
short_file_name : null, // e.g. DOS 8.3 filename, avail in some scenarios such as TIC import
tic_origin : null, // TIC "Origin"
tic_desc : null, // TIC "Desc"
tic_ldesc : null, // TIC "Ldesc" joined by '\n'
session_temp_dl : (v) => parseInt(v) ? true : false,
desc_sauce : (s) => JSON.parse(s) || {},
desc_long_sauce : (s) => JSON.parse(s) || {},
};
module.exports = class FileEntry {
constructor(options) {
options = options || {};
this.fileId = options.fileId || 0;
this.areaTag = options.areaTag || '';
this.meta = Object.assign( { dl_count : 0 }, options.meta);
this.hashTags = options.hashTags || new Set();
this.fileName = options.fileName;
this.storageTag = options.storageTag;
this.fileSha256 = options.fileSha256;
}
static loadBasicEntry(fileId, dest, cb) {
dest = dest || {};
fileDb.get(
`SELECT ${FILE_TABLE_MEMBERS.join(', ')}
FROM file
WHERE file_id=?
LIMIT 1;`,
[ fileId ],
(err, file) => {
if(err) {
return cb(err);
}
if(!file) {
return cb(Errors.DoesNotExist('No file is available by that ID'));
}
// assign props from |file|
FILE_TABLE_MEMBERS.forEach(prop => {
dest[_.camelCase(prop)] = file[prop];
});
return cb(null, dest);
}
);
}
load(fileId, cb) {
const self = this;
async.series(
[
function loadBasicEntry(callback) {
FileEntry.loadBasicEntry(fileId, self, callback);
},
function loadMeta(callback) {
return self.loadMeta(callback);
},
function loadHashTags(callback) {
return self.loadHashTags(callback);
},
function loadUserRating(callback) {
return self.loadRating(callback);
}
],
err => {
return cb(err);
}
);
}
persist(isUpdate, cb) {
if(!cb && _.isFunction(isUpdate)) {
cb = isUpdate;
isUpdate = false;
}
const self = this;
async.waterfall(
[
function check(callback) {
if(isUpdate && !self.fileId) {
return callback(Errors.Invalid('Cannot update file entry without an existing "fileId" member'));
}
return callback(null);
},
function calcSha256IfNeeded(callback) {
if(self.fileSha256) {
return callback(null);
}
if(isUpdate) {
return callback(Errors.MissingParam('fileSha256 property must be set for updates!'));
}
readFile(self.filePath, (err, data) => {
if(err) {
return callback(err);
}
const sha256 = crypto.createHash('sha256');
sha256.update(data);
self.fileSha256 = sha256.digest('hex');
return callback(null);
});
},
function startTrans(callback) {
return fileDb.beginTransaction(callback);
},
function storeEntry(trans, callback) {
if(isUpdate) {
trans.run(
`REPLACE INTO file (file_id, area_tag, file_sha256, file_name, storage_tag, desc, desc_long, upload_timestamp)
VALUES(?, ?, ?, ?, ?, ?, ?, ?);`,
[ self.fileId, self.areaTag, self.fileSha256, self.fileName, self.storageTag, self.desc, self.descLong, getISOTimestampString() ],
err => {
return callback(err, trans);
}
);
} else {
trans.run(
`REPLACE INTO file (area_tag, file_sha256, file_name, storage_tag, desc, desc_long, upload_timestamp)
VALUES(?, ?, ?, ?, ?, ?, ?);`,
[ self.areaTag, self.fileSha256, self.fileName, self.storageTag, self.desc, self.descLong, getISOTimestampString() ],
function inserted(err) { // use non-arrow func for 'this' scope / lastID
if(!err) {
self.fileId = this.lastID;
}
return callback(err, trans);
}
);
}
},
function storeMeta(trans, callback) {
async.each(Object.keys(self.meta), (n, next) => {
const v = self.meta[n];
return FileEntry.persistMetaValue(self.fileId, n, v, trans, next);
},
err => {
return callback(err, trans);
});
},
function storeHashTags(trans, callback) {
const hashTagsArray = Array.from(self.hashTags);
async.each(hashTagsArray, (hashTag, next) => {
return FileEntry.persistHashTag(self.fileId, hashTag, trans, next);
},
err => {
return callback(err, trans);
});
}
],
(err, trans) => {
// :TODO: Log orig err
if(trans) {
trans[err ? 'rollback' : 'commit'](transErr => {
return cb(transErr ? transErr : err);
});
} else {
return cb(err);
}
}
);
}
static getAreaStorageDirectoryByTag(storageTag) {
const config = Config();
const storageLocation = (storageTag && config.fileBase.storageTags[storageTag]);
// absolute paths as-is
if(storageLocation && '/' === storageLocation.charAt(0)) {
return storageLocation;
}
// relative to |areaStoragePrefix|
return paths.join(config.fileBase.areaStoragePrefix, storageLocation || '');
}
get filePath() {
const storageDir = FileEntry.getAreaStorageDirectoryByTag(this.storageTag);
return paths.join(storageDir, this.fileName);
}
static quickCheckExistsByPath(fullPath, cb) {
fileDb.get(
`SELECT COUNT() AS count
FROM file
WHERE file_name = ?
LIMIT 1;`,
[ paths.basename(fullPath) ],
(err, rows) => {
return err ? cb(err) : cb(null, rows.count > 0 ? true : false);
}
);
}
static persistUserRating(fileId, userId, rating, cb) {
return fileDb.run(
`REPLACE INTO file_user_rating (file_id, user_id, rating)
VALUES (?, ?, ?);`,
[ fileId, userId, rating ],
cb
);
}
static persistMetaValue(fileId, name, value, transOrDb, cb) {
if(!_.isFunction(cb) && _.isFunction(transOrDb)) {
cb = transOrDb;
transOrDb = fileDb;
}
return transOrDb.run(
`REPLACE INTO file_meta (file_id, meta_name, meta_value)
VALUES (?, ?, ?);`,
[ fileId, name, value ],
cb
);
}
static incrementAndPersistMetaValue(fileId, name, incrementBy, cb) {
incrementBy = incrementBy || 1;
fileDb.run(
`UPDATE file_meta
SET meta_value = meta_value + ?
WHERE file_id = ? AND meta_name = ?;`,
[ incrementBy, fileId, name ],
err => {
if(cb) {
return cb(err);
}
}
);
}
loadMeta(cb) {
fileDb.each(
`SELECT meta_name, meta_value
FROM file_meta
WHERE file_id=?;`,
[ this.fileId ],
(err, meta) => {
if(meta) {
const conv = FILE_WELL_KNOWN_META[meta.meta_name];
this.meta[meta.meta_name] = conv ? conv(meta.meta_value) : meta.meta_value;
}
},
err => {
return cb(err);
}
);
}
static persistHashTag(fileId, hashTag, transOrDb, cb) {
if(!_.isFunction(cb) && _.isFunction(transOrDb)) {
cb = transOrDb;
transOrDb = fileDb;
}
transOrDb.serialize( () => {
transOrDb.run(
`INSERT OR IGNORE INTO hash_tag (hash_tag)
VALUES (?);`,
[ hashTag ]
);
transOrDb.run(
`REPLACE INTO file_hash_tag (hash_tag_id, file_id)
VALUES (
(SELECT hash_tag_id
FROM hash_tag
WHERE hash_tag = ?),
?
);`,
[ hashTag, fileId ],
err => {
return cb(err);
}
);
});
}
loadHashTags(cb) {
fileDb.each(
`SELECT ht.hash_tag_id, ht.hash_tag
FROM hash_tag ht
WHERE ht.hash_tag_id IN (
SELECT hash_tag_id
FROM file_hash_tag
WHERE file_id=?
);`,
[ this.fileId ],
(err, hashTag) => {
if(hashTag) {
this.hashTags.add(hashTag.hash_tag);
}
},
err => {
return cb(err);
}
);
}
loadRating(cb) {
fileDb.get(
`SELECT AVG(fur.rating) AS avg_rating
FROM file_user_rating fur
INNER JOIN file f
ON f.file_id = fur.file_id
AND f.file_id = ?`,
[ this.fileId ],
(err, result) => {
if(result) {
this.userRating = result.avg_rating;
}
return cb(err);
}
);
}
setHashTags(hashTags) {
if(_.isString(hashTags)) {
this.hashTags = new Set(hashTags.split(/[\s,]+/));
} else if(Array.isArray(hashTags)) {
this.hashTags = new Set(hashTags);
} else if(hashTags instanceof Set) {
this.hashTags = hashTags;
}
}
static get WellKnownMetaValues() {
return Object.keys(FILE_WELL_KNOWN_META);
}
static findBySha(sha, cb) {
// full or partial SHA-256
fileDb.all(
`SELECT file_id
FROM file
WHERE file_sha256 LIKE "${sha}%"
LIMIT 2;`, // limit 2 such that we can find if there are dupes
(err, fileIdRows) => {
if(err) {
return cb(err);
}
if(!fileIdRows || 0 === fileIdRows.length) {
return cb(Errors.DoesNotExist('No matches'));
}
if(fileIdRows.length > 1) {
return cb(Errors.Invalid('SHA is ambiguous'));
}
const fileEntry = new FileEntry();
return fileEntry.load(fileIdRows[0].file_id, err => {
return cb(err, fileEntry);
});
}
);
}
// Attempt to fine a file by an *existing* full path.
// Checkums may have changed and are not validated here.
static findByFullPath(fullPath, cb) {
// first, basic by-filename lookup.
FileEntry.findByFileNameWildcard(paths.basename(fullPath), (err, entries) => {
if(err) {
return cb(err);
}
if(!entries || !entries.length || entries.length > 1) {
return cb(Errors.DoesNotExist('No matches'));
}
// ensure the *full* path has not changed
// :TODO: if FS is case-insensitive, we probably want a better check here
const possibleMatch = entries[0];
if(possibleMatch.fullPath === fullPath) {
return cb(null, possibleMatch);
}
return cb(Errors.DoesNotExist('No matches'));
});
}
static findByFileNameWildcard(wc, cb) {
// convert any * -> % and ? -> _ for SQLite syntax - see https://www.sqlite.org/lang_expr.html
wc = wc.replace(/\*/g, '%').replace(/\?/g, '_');
fileDb.all(
`SELECT file_id
FROM file
WHERE file_name LIKE "${wc}"
`,
(err, fileIdRows) => {
if(err) {
return cb(err);
}
if(!fileIdRows || 0 === fileIdRows.length) {
return cb(Errors.DoesNotExist('No matches'));
}
const entries = [];
async.each(fileIdRows, (row, nextRow) => {
const fileEntry = new FileEntry();
fileEntry.load(row.file_id, err => {
if(!err) {
entries.push(fileEntry);
}
return nextRow(err);
});
},
err => {
return cb(err, entries);
});
}
);
}
//
// Find file(s) by |filter|
//
// - sort: sort results by any well known name, file_id, or user_rating
// - terms: one or more search terms to search within filenames as well
// as short and long descriptions. We attempt to use the FTS ability when
// possible, but want to allow users to search for wildcard matches in
// which some cases we'll use multiple LIKE queries.
// See _normalizeFileSearchTerms()
//
static findFiles(filter, cb) {
filter = filter || {};
let sql;
let sqlWhere = '';
let sqlOrderBy;
const sqlOrderDir = 'ascending' === filter.order ? 'ASC' : 'DESC';
if(moment.isMoment(filter.newerThanTimestamp)) {
filter.newerThanTimestamp = getISOTimestampString(filter.newerThanTimestamp);
}
function getOrderByWithCast(ob) {
if( [ 'dl_count', 'est_release_year', 'byte_size' ].indexOf(filter.sort) > -1 ) {
return `ORDER BY CAST(${ob} AS INTEGER)`;
}
return `ORDER BY ${ob}`;
}
function appendWhereClause(clause) {
if(sqlWhere) {
sqlWhere += ' AND ';
} else {
sqlWhere += ' WHERE ';
}
sqlWhere += clause;
}
if(filter.sort && filter.sort.length > 0) {
if(Object.keys(FILE_WELL_KNOWN_META).indexOf(filter.sort) > -1) { // sorting via a meta value?
sql =
`SELECT DISTINCT f.file_id
FROM file f, file_meta m`;
appendWhereClause(`f.file_id = m.file_id AND m.meta_name = "${filter.sort}"`);
sqlOrderBy = `${getOrderByWithCast('m.meta_value')} ${sqlOrderDir}`;
} else {
// additional special treatment for user ratings: we need to average them
if('user_rating' === filter.sort) {
sql =
`SELECT DISTINCT f.file_id,
(SELECT IFNULL(AVG(rating), 0) rating
FROM file_user_rating
WHERE file_id = f.file_id)
AS avg_rating
FROM file f`;
sqlOrderBy = `ORDER BY avg_rating ${sqlOrderDir}`;
} else {
sql =
`SELECT DISTINCT f.file_id
FROM file f`;
sqlOrderBy = getOrderByWithCast(`f.${filter.sort}`) + ' ' + sqlOrderDir;
}
}
} else {
sql =
`SELECT DISTINCT f.file_id
FROM file f`;
sqlOrderBy = `${getOrderByWithCast('f.file_id')} ${sqlOrderDir}`;
}
if(filter.areaTag && filter.areaTag.length > 0) {
if(Array.isArray(filter.areaTag)) {
const areaList = filter.areaTag.map(t => `"${t}"`).join(', ');
appendWhereClause(`f.area_tag IN(${areaList})`);
} else {
appendWhereClause(`f.area_tag = "${filter.areaTag}"`);
}
}
if(filter.metaPairs && filter.metaPairs.length > 0) {
filter.metaPairs.forEach(mp => {
if(mp.wildcards) {
// convert any * -> % and ? -> _ for SQLite syntax - see https://www.sqlite.org/lang_expr.html
mp.value = mp.value.replace(/\*/g, '%').replace(/\?/g, '_');
appendWhereClause(
`f.file_id IN (
SELECT file_id
FROM file_meta
WHERE meta_name = "${mp.name}" AND meta_value LIKE "${mp.value}"
)`
);
} else {
appendWhereClause(
`f.file_id IN (
SELECT file_id
FROM file_meta
WHERE meta_name = "${mp.name}" AND meta_value = "${mp.value}"
)`
);
}
});
}
if(filter.storageTag && filter.storageTag.length > 0) {
appendWhereClause(`f.storage_tag="${filter.storageTag}"`);
}
if(filter.terms && filter.terms.length > 0) {
const [terms, queryType] = FileEntry._normalizeFileSearchTerms(filter.terms);
if ('fts_match' === queryType) {
// note the ':' in MATCH expr., see https://www.sqlite.org/cvstrac/wiki?p=FullTextIndex
appendWhereClause(
`f.file_id IN (
SELECT rowid
FROM file_fts
WHERE file_fts MATCH ":${terms}"
)`
);
} else {
appendWhereClause(
`(f.file_name LIKE "${terms}" OR
f.desc LIKE "${terms}" OR
f.desc_long LIKE "${terms}")`
);
}
}
// handle e.g. 1998 -> "1998"
if (_.isNumber(filter.tags)) {
filter.tags = filter.tags.toString();
}
if(filter.tags && filter.tags.length > 0) {
// build list of quoted tags; filter.tags comes in as a space and/or comma separated values
const tags = filter.tags.replace(/,/g, ' ').replace(/\s{2,}/g, ' ').split(' ').map( tag => `"${sanitizeString(tag)}"` ).join(',');
appendWhereClause(
`f.file_id IN (
SELECT file_id
FROM file_hash_tag
WHERE hash_tag_id IN (
SELECT hash_tag_id
FROM hash_tag
WHERE hash_tag IN (${tags})
)
)`
);
}
if(_.isString(filter.newerThanTimestamp) && filter.newerThanTimestamp.length > 0) {
appendWhereClause(`DATETIME(f.upload_timestamp) > DATETIME("${filter.newerThanTimestamp}", "+1 seconds")`);
}
if(_.isNumber(filter.newerThanFileId)) {
appendWhereClause(`f.file_id > ${filter.newerThanFileId}`);
}
sql += `${sqlWhere} ${sqlOrderBy}`;
if(_.isNumber(filter.limit)) {
sql += ` LIMIT ${filter.limit}`;
}
sql += ';';
fileDb.all(sql, (err, rows) => {
if(err) {
return cb(err);
}
if(!rows || 0 === rows.length) {
return cb(null, []); // no matches
}
return cb(null, rows.map(r => r.file_id));
});
}
static removeEntry(srcFileEntry, options, cb) {
if(!_.isFunction(cb) && _.isFunction(options)) {
cb = options;
options = {};
}
async.series(
[
function removeFromDatabase(callback) {
fileDb.run(
`DELETE FROM file
WHERE file_id = ?;`,
[ srcFileEntry.fileId ],
err => {
return callback(err);
}
);
},
function optionallyRemovePhysicalFile(callback) {
if(true !== options.removePhysFile) {
return callback(null);
}
unlink(srcFileEntry.filePath, err => {
return callback(err);
});
}
],
err => {
return cb(err);
}
);
}
static moveEntry(srcFileEntry, destAreaTag, destStorageTag, destFileName, cb) {
if(!cb && _.isFunction(destFileName)) {
cb = destFileName;
destFileName = srcFileEntry.fileName;
}
const srcPath = srcFileEntry.filePath;
const dstDir = FileEntry.getAreaStorageDirectoryByTag(destStorageTag);
if(!dstDir) {
return cb(Errors.Invalid('Invalid storage tag'));
}
const dstPath = paths.join(dstDir, destFileName);
async.series(
[
function movePhysFile(callback) {
if(srcPath === dstPath) {
return callback(null); // don't need to move file, but may change areas
}
fse.move(srcPath, dstPath, err => {
return callback(err);
});
},
function updateDatabase(callback) {
fileDb.run(
`UPDATE file
SET area_tag = ?, file_name = ?, storage_tag = ?
WHERE file_id = ?;`,
[ destAreaTag, destFileName, destStorageTag, srcFileEntry.fileId ],
err => {
return callback(err);
}
);
}
],
err => {
return cb(err);
}
);
}
static _normalizeFileSearchTerms(terms) {
// ensure we have reasonable input to start with
terms = sanitizeString(terms.toString());
// No wildcards?
const hasSingleCharWC = terms.indexOf('?') > -1;
if (terms.indexOf('*') === -1 && !hasSingleCharWC) {
return [ terms, 'fts_match' ];
}
const prepareLike = () => {
// Convert * and ? to SQL LIKE style
terms = terms.replace(/\*/g, '%').replace(/\?/g, '_');
return terms;
};
// Any ? wildcards?
if (hasSingleCharWC) {
return [ prepareLike(terms), 'like' ];
}
const split = terms.replace(/\s+/g, ' ').split(' ');
const useLike = split.some(term => {
if (term.indexOf('?') > -1) {
return true;
}
const wcPos = term.indexOf('*');
if (wcPos > -1 && wcPos !== term.length - 1) {
return true;
}
return false;
});
if (useLike) {
return [ prepareLike(terms), 'like' ];
}
return [ terms, 'fts_match' ];
}
};