enigma-bbs/core/file_entry.js

857 lines
27 KiB
JavaScript

/* jslint node: true */
'use strict';
const fileDb = require('./database.js').dbs.file;
const Errors = require('./enig_error.js').Errors;
const { getISOTimestampString, sanitizeString } = require('./database.js');
const Config = require('./config.js').get;
// deps
const async = require('async');
const _ = require('lodash');
const paths = require('path');
const fse = require('fs-extra');
const { unlink, readFile } = require('graceful-fs');
const crypto = require('crypto');
const moment = require('moment');
const FILE_TABLE_MEMBERS = [
'file_id',
'area_tag',
'file_sha256',
'file_name',
'storage_tag',
'desc',
'desc_long',
'upload_timestamp',
];
const FILE_WELL_KNOWN_META = {
// name -> *read* converter, if any
upload_by_username: null,
upload_by_user_id: u => parseInt(u) || 0,
file_md5: null,
file_sha1: null,
file_crc32: null,
est_release_year: y => parseInt(y) || new Date().getFullYear(),
dl_count: d => parseInt(d) || 0,
byte_size: b => parseInt(b) || 0,
archive_type: null,
short_file_name: null, // e.g. DOS 8.3 filename, avail in some scenarios such as TIC import
tic_origin: null, // TIC "Origin"
tic_desc: null, // TIC "Desc"
tic_ldesc: null, // TIC "Ldesc" joined by '\n'
session_temp_dl: v => (parseInt(v) ? true : false),
desc_sauce: s => JSON.parse(s) || {},
desc_long_sauce: s => JSON.parse(s) || {},
};
module.exports = class FileEntry {
constructor(options) {
options = options || {};
this.fileId = options.fileId || 0;
this.areaTag = options.areaTag || '';
this.meta = Object.assign({ dl_count: 0 }, options.meta);
this.hashTags = options.hashTags || new Set();
this.fileName = options.fileName;
this.storageTag = options.storageTag;
this.fileSha256 = options.fileSha256;
}
static loadBasicEntry(fileId, dest, cb) {
dest = dest || {};
fileDb.get(
`SELECT ${FILE_TABLE_MEMBERS.join(', ')}
FROM file
WHERE file_id=?
LIMIT 1;`,
[fileId],
(err, file) => {
if (err) {
return cb(err);
}
if (!file) {
return cb(Errors.DoesNotExist('No file is available by that ID'));
}
// assign props from |file|
FILE_TABLE_MEMBERS.forEach(prop => {
dest[_.camelCase(prop)] = file[prop];
});
return cb(null, dest);
}
);
}
load(fileId, cb) {
const self = this;
async.series(
[
function loadBasicEntry(callback) {
FileEntry.loadBasicEntry(fileId, self, callback);
},
function loadMeta(callback) {
return self.loadMeta(callback);
},
function loadHashTags(callback) {
return self.loadHashTags(callback);
},
function loadUserRating(callback) {
return self.loadRating(callback);
},
],
err => {
return cb(err);
}
);
}
persist(isUpdate, cb) {
if (!cb && _.isFunction(isUpdate)) {
cb = isUpdate;
isUpdate = false;
}
const self = this;
async.waterfall(
[
function check(callback) {
if (isUpdate && !self.fileId) {
return callback(
Errors.Invalid(
'Cannot update file entry without an existing "fileId" member'
)
);
}
return callback(null);
},
function calcSha256IfNeeded(callback) {
if (self.fileSha256) {
return callback(null);
}
if (isUpdate) {
return callback(
Errors.MissingParam(
'fileSha256 property must be set for updates!'
)
);
}
readFile(self.filePath, (err, data) => {
if (err) {
return callback(err);
}
const sha256 = crypto.createHash('sha256');
sha256.update(data);
self.fileSha256 = sha256.digest('hex');
return callback(null);
});
},
function startTrans(callback) {
return fileDb.beginTransaction(callback);
},
function storeEntry(trans, callback) {
if (isUpdate) {
trans.run(
`REPLACE INTO file (file_id, area_tag, file_sha256, file_name, storage_tag, desc, desc_long, upload_timestamp)
VALUES(?, ?, ?, ?, ?, ?, ?, ?);`,
[
self.fileId,
self.areaTag,
self.fileSha256,
self.fileName,
self.storageTag,
self.desc,
self.descLong,
getISOTimestampString(),
],
err => {
return callback(err, trans);
}
);
} else {
trans.run(
`REPLACE INTO file (area_tag, file_sha256, file_name, storage_tag, desc, desc_long, upload_timestamp)
VALUES(?, ?, ?, ?, ?, ?, ?);`,
[
self.areaTag,
self.fileSha256,
self.fileName,
self.storageTag,
self.desc,
self.descLong,
getISOTimestampString(),
],
function inserted(err) {
// use non-arrow func for 'this' scope / lastID
if (!err) {
self.fileId = this.lastID;
}
return callback(err, trans);
}
);
}
},
function storeMeta(trans, callback) {
async.each(
Object.keys(self.meta),
(n, next) => {
const v = self.meta[n];
return FileEntry.persistMetaValue(
self.fileId,
n,
v,
trans,
next
);
},
err => {
return callback(err, trans);
}
);
},
function storeHashTags(trans, callback) {
const hashTagsArray = Array.from(self.hashTags);
async.each(
hashTagsArray,
(hashTag, next) => {
return FileEntry.persistHashTag(
self.fileId,
hashTag,
trans,
next
);
},
err => {
return callback(err, trans);
}
);
},
],
(err, trans) => {
// :TODO: Log orig err
if (trans) {
trans[err ? 'rollback' : 'commit'](transErr => {
return cb(transErr ? transErr : err);
});
} else {
return cb(err);
}
}
);
}
static getAreaStorageDirectoryByTag(storageTag) {
const config = Config();
const storageLocation = storageTag && config.fileBase.storageTags[storageTag];
// absolute paths as-is
if (storageLocation && '/' === storageLocation.charAt(0)) {
return storageLocation;
}
// relative to |areaStoragePrefix|
return paths.join(config.fileBase.areaStoragePrefix, storageLocation || '');
}
get filePath() {
const storageDir = FileEntry.getAreaStorageDirectoryByTag(this.storageTag);
return paths.join(storageDir, this.fileName);
}
static quickCheckExistsByPath(fullPath, cb) {
fileDb.get(
`SELECT COUNT() AS count
FROM file
WHERE file_name = ?
LIMIT 1;`,
[paths.basename(fullPath)],
(err, rows) => {
return err ? cb(err) : cb(null, rows.count > 0 ? true : false);
}
);
}
static persistUserRating(fileId, userId, rating, cb) {
return fileDb.run(
`REPLACE INTO file_user_rating (file_id, user_id, rating)
VALUES (?, ?, ?);`,
[fileId, userId, rating],
cb
);
}
static removeUserRatings(userId, cb) {
return fileDb.run(
`DELETE FROM file_user_rating
WHERE user_id = ?;`,
[userId],
cb
);
}
static persistMetaValue(fileId, name, value, transOrDb, cb) {
if (!_.isFunction(cb) && _.isFunction(transOrDb)) {
cb = transOrDb;
transOrDb = fileDb;
}
return transOrDb.run(
`REPLACE INTO file_meta (file_id, meta_name, meta_value)
VALUES (?, ?, ?);`,
[fileId, name, value],
cb
);
}
static incrementAndPersistMetaValue(fileId, name, incrementBy, cb) {
incrementBy = incrementBy || 1;
fileDb.run(
`UPDATE file_meta
SET meta_value = meta_value + ?
WHERE file_id = ? AND meta_name = ?;`,
[incrementBy, fileId, name],
err => {
if (cb) {
return cb(err);
}
}
);
}
loadMeta(cb) {
fileDb.each(
`SELECT meta_name, meta_value
FROM file_meta
WHERE file_id=?;`,
[this.fileId],
(err, meta) => {
if (meta) {
const conv = FILE_WELL_KNOWN_META[meta.meta_name];
this.meta[meta.meta_name] = conv
? conv(meta.meta_value)
: meta.meta_value;
}
},
err => {
return cb(err);
}
);
}
static persistHashTag(fileId, hashTag, transOrDb, cb) {
if (!_.isFunction(cb) && _.isFunction(transOrDb)) {
cb = transOrDb;
transOrDb = fileDb;
}
transOrDb.serialize(() => {
transOrDb.run(
`INSERT OR IGNORE INTO hash_tag (hash_tag)
VALUES (?);`,
[hashTag]
);
transOrDb.run(
`REPLACE INTO file_hash_tag (hash_tag_id, file_id)
VALUES (
(SELECT hash_tag_id
FROM hash_tag
WHERE hash_tag = ?),
?
);`,
[hashTag, fileId],
err => {
return cb(err);
}
);
});
}
loadHashTags(cb) {
fileDb.each(
`SELECT ht.hash_tag_id, ht.hash_tag
FROM hash_tag ht
WHERE ht.hash_tag_id IN (
SELECT hash_tag_id
FROM file_hash_tag
WHERE file_id=?
);`,
[this.fileId],
(err, hashTag) => {
if (hashTag) {
this.hashTags.add(hashTag.hash_tag);
}
},
err => {
return cb(err);
}
);
}
loadRating(cb) {
fileDb.get(
`SELECT AVG(fur.rating) AS avg_rating
FROM file_user_rating fur
INNER JOIN file f
ON f.file_id = fur.file_id
AND f.file_id = ?`,
[this.fileId],
(err, result) => {
if (result) {
this.userRating = result.avg_rating;
}
return cb(err);
}
);
}
setHashTags(hashTags) {
if (_.isString(hashTags)) {
this.hashTags = new Set(hashTags.split(/[\s,]+/));
} else if (Array.isArray(hashTags)) {
this.hashTags = new Set(hashTags);
} else if (hashTags instanceof Set) {
this.hashTags = hashTags;
}
}
static get WellKnownMetaValues() {
return Object.keys(FILE_WELL_KNOWN_META);
}
static getFileIdsBySha(sha, options = {}, cb) {
// full or partial SHA-256
const limit = _.isNumber(options.limit) ? `LIMIT ${options.limit}` : '';
fileDb.all(
`SELECT file_id
FROM file
WHERE file_sha256 LIKE "${sha}%" ${limit};`,
(err, fileIdRows) => {
if (err) {
return cb(err);
}
return cb(
null,
(fileIdRows || []).map(r => r.file_id)
);
}
);
}
static findBySha(sha, cb) {
FileEntry.getFileIdsBySha(sha, { limit: 2 }, (err, fileIds) => {
if (err) {
return cb(err);
}
if (!fileIds || 0 === fileIds.length) {
return cb(Errors.DoesNotExist('No matches'));
}
if (fileIds.length > 1) {
return cb(Errors.Invalid('SHA is ambiguous'));
}
const fileEntry = new FileEntry();
return fileEntry.load(fileIds[0], err => {
return cb(err, fileEntry);
});
});
}
// Attempt to fine a file by an *existing* full path.
// Checkums may have changed and are not validated here.
static findByFullPath(fullPath, cb) {
// first, basic by-filename lookup.
FileEntry.findByFileNameWildcard(paths.basename(fullPath), (err, entries) => {
if (err) {
return cb(err);
}
if (!entries || !entries.length || entries.length > 1) {
return cb(Errors.DoesNotExist('No matches'));
}
// ensure the *full* path has not changed
// :TODO: if FS is case-insensitive, we probably want a better check here
const possibleMatch = entries[0];
if (possibleMatch.fullPath === fullPath) {
return cb(null, possibleMatch);
}
return cb(Errors.DoesNotExist('No matches'));
});
}
static findByFileNameWildcard(wc, cb) {
// convert any * -> % and ? -> _ for SQLite syntax - see https://www.sqlite.org/lang_expr.html
wc = wc.replace(/\*/g, '%').replace(/\?/g, '_');
fileDb.all(
`SELECT file_id
FROM file
WHERE file_name LIKE "${wc}"
`,
(err, fileIdRows) => {
if (err) {
return cb(err);
}
if (!fileIdRows || 0 === fileIdRows.length) {
return cb(Errors.DoesNotExist('No matches'));
}
const entries = [];
async.each(
fileIdRows,
(row, nextRow) => {
const fileEntry = new FileEntry();
fileEntry.load(row.file_id, err => {
if (!err) {
entries.push(fileEntry);
}
return nextRow(err);
});
},
err => {
return cb(err, entries);
}
);
}
);
}
//
// Find file(s) by |filter|
//
// - sort: sort results by any well known name, file_id, or user_rating
// - terms: one or more search terms to search within filenames as well
// as short and long descriptions. We attempt to use the FTS ability when
// possible, but want to allow users to search for wildcard matches in
// which some cases we'll use multiple LIKE queries.
// See _normalizeFileSearchTerms()
//
static findFiles(filter, cb) {
filter = filter || {};
let sql;
let sqlWhere = '';
let sqlOrderBy;
const sqlOrderDir = 'ascending' === filter.order ? 'ASC' : 'DESC';
if (moment.isMoment(filter.newerThanTimestamp)) {
filter.newerThanTimestamp = getISOTimestampString(filter.newerThanTimestamp);
}
function getOrderByWithCast(ob) {
if (['dl_count', 'est_release_year', 'byte_size'].indexOf(filter.sort) > -1) {
return `ORDER BY CAST(${ob} AS INTEGER)`;
}
return `ORDER BY ${ob}`;
}
function appendWhereClause(clause) {
if (sqlWhere) {
sqlWhere += ' AND ';
} else {
sqlWhere += ' WHERE ';
}
sqlWhere += clause;
}
if (filter.sort && filter.sort.length > 0) {
if (Object.keys(FILE_WELL_KNOWN_META).indexOf(filter.sort) > -1) {
// sorting via a meta value?
sql = `SELECT DISTINCT f.file_id
FROM file f, file_meta m`;
appendWhereClause(
`f.file_id = m.file_id AND m.meta_name = "${filter.sort}"`
);
sqlOrderBy = `${getOrderByWithCast('m.meta_value')} ${sqlOrderDir}`;
} else {
// additional special treatment for user ratings: we need to average them
if ('user_rating' === filter.sort) {
sql = `SELECT DISTINCT f.file_id,
(SELECT IFNULL(AVG(rating), 0) rating
FROM file_user_rating
WHERE file_id = f.file_id)
AS avg_rating
FROM file f`;
sqlOrderBy = `ORDER BY avg_rating ${sqlOrderDir}`;
} else {
sql = `SELECT DISTINCT f.file_id
FROM file f`;
sqlOrderBy =
getOrderByWithCast(`f.${filter.sort}`) + ' ' + sqlOrderDir;
}
}
} else {
sql = `SELECT DISTINCT f.file_id
FROM file f`;
sqlOrderBy = `${getOrderByWithCast('f.file_id')} ${sqlOrderDir}`;
}
if (filter.areaTag && filter.areaTag.length > 0) {
if (Array.isArray(filter.areaTag)) {
const areaList = filter.areaTag.map(t => `"${t}"`).join(', ');
appendWhereClause(`f.area_tag IN(${areaList})`);
} else {
appendWhereClause(`f.area_tag = "${filter.areaTag}"`);
}
}
if (filter.metaPairs && filter.metaPairs.length > 0) {
filter.metaPairs.forEach(mp => {
if (mp.wildcards) {
// convert any * -> % and ? -> _ for SQLite syntax - see https://www.sqlite.org/lang_expr.html
mp.value = mp.value.replace(/\*/g, '%').replace(/\?/g, '_');
appendWhereClause(
`f.file_id IN (
SELECT file_id
FROM file_meta
WHERE meta_name = "${mp.name}" AND meta_value LIKE "${mp.value}"
)`
);
} else {
appendWhereClause(
`f.file_id IN (
SELECT file_id
FROM file_meta
WHERE meta_name = "${mp.name}" AND meta_value = "${mp.value}"
)`
);
}
});
}
if (filter.storageTag && filter.storageTag.length > 0) {
appendWhereClause(`f.storage_tag="${filter.storageTag}"`);
}
if (filter.terms && filter.terms.length > 0) {
const [terms, queryType] = FileEntry._normalizeFileSearchTerms(filter.terms);
if ('fts_match' === queryType) {
// note the ':' in MATCH expr., see https://www.sqlite.org/cvstrac/wiki?p=FullTextIndex
appendWhereClause(
`f.file_id IN (
SELECT rowid
FROM file_fts
WHERE file_fts MATCH ":${terms}"
)`
);
} else {
appendWhereClause(
`(f.file_name LIKE "${terms}" OR
f.desc LIKE "${terms}" OR
f.desc_long LIKE "${terms}")`
);
}
}
if (_.isString(filter.fileName) && filter.fileName.length > 0) {
const caseSensitive = _.get(filter, 'filenameCaseSensitive', false);
const collate = caseSensitive ? '' : 'COLLATE NOCASE';
appendWhereClause(
`(f.file_name = "${sanitizeString(filter.fileName)}" ${collate})`
);
}
// handle e.g. 1998 -> "1998"
if (_.isNumber(filter.tags)) {
filter.tags = filter.tags.toString();
}
if (filter.tags && filter.tags.length > 0) {
// build list of quoted tags; filter.tags comes in as a space and/or comma separated values
const tags = filter.tags
.replace(/,/g, ' ')
.replace(/\s{2,}/g, ' ')
.split(' ')
.map(tag => `"${sanitizeString(tag)}"`)
.join(',');
appendWhereClause(
`f.file_id IN (
SELECT file_id
FROM file_hash_tag
WHERE hash_tag_id IN (
SELECT hash_tag_id
FROM hash_tag
WHERE hash_tag IN (${tags})
)
)`
);
}
if (
_.isString(filter.newerThanTimestamp) &&
filter.newerThanTimestamp.length > 0
) {
appendWhereClause(
`DATETIME(f.upload_timestamp) > DATETIME("${filter.newerThanTimestamp}", "+1 seconds")`
);
}
if (_.isNumber(filter.newerThanFileId)) {
appendWhereClause(`f.file_id > ${filter.newerThanFileId}`);
}
sql += `${sqlWhere} ${sqlOrderBy}`;
if (_.isNumber(filter.limit)) {
sql += ` LIMIT ${filter.limit}`;
}
sql += ';';
fileDb.all(sql, (err, rows) => {
if (err) {
return cb(err);
}
if (!rows || 0 === rows.length) {
return cb(null, []); // no matches
}
return cb(
null,
rows.map(r => r.file_id)
);
});
}
static removeEntry(srcFileEntry, options, cb) {
if (!_.isFunction(cb) && _.isFunction(options)) {
cb = options;
options = {};
}
async.series(
[
function removeFromDatabase(callback) {
fileDb.run(
`DELETE FROM file
WHERE file_id = ?;`,
[srcFileEntry.fileId],
err => {
return callback(err);
}
);
},
function optionallyRemovePhysicalFile(callback) {
if (true !== options.removePhysFile) {
return callback(null);
}
unlink(srcFileEntry.filePath, err => {
return callback(err);
});
},
],
err => {
return cb(err);
}
);
}
static moveEntry(srcFileEntry, destAreaTag, destStorageTag, destFileName, cb) {
if (!cb && _.isFunction(destFileName)) {
cb = destFileName;
destFileName = srcFileEntry.fileName;
}
const srcPath = srcFileEntry.filePath;
const dstDir = FileEntry.getAreaStorageDirectoryByTag(destStorageTag);
if (!dstDir) {
return cb(Errors.Invalid('Invalid storage tag'));
}
const dstPath = paths.join(dstDir, destFileName);
async.series(
[
function movePhysFile(callback) {
if (srcPath === dstPath) {
return callback(null); // don't need to move file, but may change areas
}
fse.move(srcPath, dstPath, err => {
return callback(err);
});
},
function updateDatabase(callback) {
fileDb.run(
`UPDATE file
SET area_tag = ?, file_name = ?, storage_tag = ?
WHERE file_id = ?;`,
[destAreaTag, destFileName, destStorageTag, srcFileEntry.fileId],
err => {
return callback(err);
}
);
},
],
err => {
return cb(err);
}
);
}
static _normalizeFileSearchTerms(terms) {
// ensure we have reasonable input to start with
terms = sanitizeString(terms.toString());
// No wildcards?
const hasSingleCharWC = terms.indexOf('?') > -1;
if (terms.indexOf('*') === -1 && !hasSingleCharWC) {
return [terms, 'fts_match'];
}
const prepareLike = () => {
// Convert * and ? to SQL LIKE style
terms = terms.replace(/\*/g, '%').replace(/\?/g, '_');
return terms;
};
// Any ? wildcards?
if (hasSingleCharWC) {
return [prepareLike(terms), 'like'];
}
const split = terms.replace(/\s+/g, ' ').split(' ');
const useLike = split.some(term => {
if (term.indexOf('?') > -1) {
return true;
}
const wcPos = term.indexOf('*');
if (wcPos > -1 && wcPos !== term.length - 1) {
return true;
}
return false;
});
if (useLike) {
return [prepareLike(terms), 'like'];
}
return [terms, 'fts_match'];
}
};