Add/store channel archive metadata document #8

Merged
raucao merged 3 commits from feature/7-archive_metadata into master 2021-11-13 20:56:18 +00:00
3 changed files with 151 additions and 21 deletions

6
package-lock.json generated
View File

@ -2215,6 +2215,12 @@
"regenerate": "^1.4.0" "regenerate": "^1.4.0"
} }
}, },
"regenerator-runtime": {
"version": "0.13.9",
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz",
"integrity": "sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA==",
"dev": true
},
"resolve": { "resolve": {
"version": "1.20.0", "version": "1.20.0",
"resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz",

View File

@ -22,6 +22,7 @@
"@babel/preset-env": "^7.14.9", "@babel/preset-env": "^7.14.9",
"babel-loader": "^8.2.2", "babel-loader": "^8.2.2",
"webpack": "^5.48.0", "webpack": "^5.48.0",
"webpack-cli": "^4.8.0" "webpack-cli": "^4.8.0",
"regenerator-runtime": "^0.13.9"
} }
} }

View File

@ -1,3 +1,5 @@
import 'regenerator-runtime/runtime';
function pad (num) { function pad (num) {
num = String(num); num = String(num);
if (num.length === 1) { num = "0" + num; } if (num.length === 1) { num = "0" + num; }
@ -12,9 +14,17 @@ function parseDate (date) {
}; };
}; };
function lowestNumberInListing(items) {
const sortedNumbers = Object.keys(items)
.map(i => parseInt(i))
.filter(i => !Number.isNaN(i))
.sort();

.sort() is enough, as it's the default sort order.

`.sort()` is enough, as it's the default sort order.
return sortedNumbers[0];
}
const ChatMessages = function (privateClient, publicClient) { const ChatMessages = function (privateClient, publicClient) {
/** /**
* Schema: chat-messages/daily * Schema: chat-messages/daily-archive
* *
* Represents one calendar day of chat messages * Represents one calendar day of chat messages
* *
@ -48,8 +58,7 @@ const ChatMessages = function (privateClient, publicClient) {
"properties": { "properties": {
"@context": { "@context": {
"type": "string", "type": "string",
"default": "https://kosmos.org/ns/v2", "default": "https://kosmos.org/ns/v2/chat-channel"
"enum": ["https://kosmos.org/ns/v2"]
}, },
"@id": { "@id": {
"type": "string", "type": "string",
@ -141,8 +150,53 @@ const ChatMessages = function (privateClient, publicClient) {
"required": [] "required": []
}; };
privateClient.declareType("daily-archive", "https://kosmos.org/ns/v2", archiveSchema); privateClient.declareType("daily-archive", "https://kosmos.org/ns/v2/chat-channel", archiveSchema);
publicClient.declareType("daily-archive", "https://kosmos.org/ns/v2", archiveSchema); publicClient.declareType("daily-archive", "https://kosmos.org/ns/v2/chat-channel", archiveSchema);
/**
* Schema: chat-messages/daily-archive-meta
*
* Stores meta information about the daily archives
*
* @example
* {
* "@context": "https://kosmos.org/ns/v2",
* "@id": "chat-messages/irc.libera.chat/channels/kosmos/meta",
* "@type": "ChatChannelMeta",
* "first": "2009/01/03",
* "last": "2021/11/05"
* }
* }
*/
const archiveMetaSchema = {
"type": "object",
"properties": {
"@context": {
"type": "string",
"default": "https://kosmos.org/ns/v2/chat-channel-meta"
},
"@id": {
"type": "string",
},
"@type": {
"type": "string",
"default": "ChatChannelMeta",
"enum": ["ChatChannelMeta"]
},
"first": {
"type": "string",
"pattern": "^[0-9]{4}\/[0-9]{2}\/[0-9]{2}$"
},
"last": {
"type": "string",
"pattern": "^[0-9]{4}\/[0-9]{2}\/[0-9]{2}$"
}
},
"required": ["@id", "first", "last"]
};
privateClient.declareType("daily-archive-meta", "https://kosmos.org/ns/v2/chat-channel-meta", archiveMetaSchema);
publicClient.declareType("daily-archive-meta", "https://kosmos.org/ns/v2/chat-channel-meta", archiveMetaSchema);
/** /**
* A daily archive stores chat messages by calendar day. * A daily archive stores chat messages by calendar day.
@ -257,17 +311,27 @@ const ChatMessages = function (privateClient, publicClient) {
this.dateId = this.parsedDate.year+'/'+this.parsedDate.month+'/'+this.parsedDate.day; this.dateId = this.parsedDate.year+'/'+this.parsedDate.month+'/'+this.parsedDate.day;
/** /**
* @property {string} path - Document path of the archive file * @property {string} channelPath - Base directory path of the channel archives
*/ */
if (this.channelType === "room") { if (this.channelType === "room") {
// Normal chatroom // Normal chatroom
const channelName = this.channelName.replace(/#/,''); const channelName = this.channelName.replace(/#/,'');
this.path = `${this.service.domain}/channels/${channelName}/${this.dateId}`; this.channelPath = `${this.service.domain}/channels/${channelName}`;
} else { } else {
// User direct message // User direct messages
this.path = `${this.service.domain}/users/${this.channelName}/${this.dateId}`; this.channelPath = `${this.service.domain}/users/${this.channelName}`;
} }
/**
* @property {string} path - Path of the archive document
*/
this.path = `${this.channelPath}/${this.dateId}`;
/**
* @property {string} metaPath - Path of the channel's metadata document
*/
this.metaPath = `${this.channelPath}/meta`;
/** /**
* @property {object} client - Public or private remoteStorgage.js BaseClient * @property {object} client - Public or private remoteStorgage.js BaseClient
*/ */
@ -349,6 +413,8 @@ const ChatMessages = function (privateClient, publicClient) {
* @returns {Promise} * @returns {Promise}
*/ */
remove () { remove () {
// TODO when removing, if previous is set, but not next, it means the
// removed file is the last archive. Thus, set "last" to previous file.
return this.client.remove(this.path); return this.client.remove(this.path);
} }
@ -359,7 +425,7 @@ const ChatMessages = function (privateClient, publicClient) {
* *
* @private * @private
*/ */
_updateDocument (archive, messages) { async _updateDocument (archive, messages) {
console.debug('[chat-messages] Updating archive document'); console.debug('[chat-messages] Updating archive document');
if (Array.isArray(messages)) { if (Array.isArray(messages)) {
@ -380,12 +446,12 @@ const ChatMessages = function (privateClient, publicClient) {
* *
* @private * @private
*/ */
_createDocument (messages) { async _createDocument (messages) {
console.debug('[chat-messages] Creating new archive document'); console.debug('[chat-messages] Creating new archive document');
const archive = this._buildArchiveObject(); const archive = this._buildArchiveObject();
if (Array.isArray(messages)) { if (Array.isArray(messages)) {
messages.forEach((message) => { messages.forEach(message => {
archive.today.messages.push(message); archive.today.messages.push(message);
}); });
} else { } else {
@ -397,16 +463,22 @@ const ChatMessages = function (privateClient, publicClient) {
// That includes setting 'next' in the previous log file // That includes setting 'next' in the previous log file
if (this.previous) { archive.today.previous = this.previous; } if (this.previous) { archive.today.previous = this.previous; }
if (this.next) { archive.today.next = this.next; } if (this.next) { archive.today.next = this.next; }
return this._sync(archive);
} else { } else {
// Find and update previous archive, set 'previous' on this one // Find and update previous archive, set 'previous' on this one
return this._updatePreviousArchive().then((previous) => { const previous = await this._updatePreviousArchive();
if (typeof previous === 'object') { if (typeof previous === 'object') {
archive.today.previous = previous.today['@id']; archive.today.previous = previous.today['@id'];
} }
return this._sync(archive);
});
} }
await this._sync(archive);
// TODO only write meta doc if argument is set on addMessages. This way
// we can avoid race conditions when syncing remote chat messages all at
// once for multiple days
await this._updateArchiveMetaDocument();
return;
} }
/* /*
@ -537,6 +609,57 @@ const ChatMessages = function (privateClient, publicClient) {
}); });
} }
async _updateArchiveMetaDocument () {
const meta = await this.client.getObject(this.metaPath);
if (typeof meta !== 'object') {
return this._createArchiveMetaDocument();
}
// Only update document if current date is newer than known "last"
if (Date.parse(meta.last.replace('/','-')) > Date.parse(this.date)) {
console.debug('[chat-messages]', 'Updating meta document for channel');
meta.last = this.dateId;
await this.client.storeObject('daily-archive-meta', this.metaPath, meta);
}
return;
}
async _createArchiveMetaDocument () {
console.debug('[chat-messages]', 'Creating new meta document for channel');
// When creating a new meta doc, we need to find the oldest archive,
// because older versions of the module did not write a meta doc.
const first = await this._findFirstArchive();
const roomName = this.channelName.replace(/#/,'');
const meta = {
'@id': `chat-messages/${this.service.domain}/channels/${roomName}/meta`,
'@type': 'ChatChannelMeta',
first: first,
last: this.dateId // TODO might have to search for last?
};
return this.client.storeObject('daily-archive-meta', this.metaPath, meta)
.then(() => console.debug('[chat-messages]', 'Meta document written to remote storage'))
.catch(e => {
console.log('[chat-messages]', `Failed to store ${this.metaPath}`);
console.error(e);
});
}
async _findFirstArchive () {
console.debug('[chat-messages]', 'Finding first archive for channel');
const years = await this.client.getListing(`${this.channelPath}/`);
const year = lowestNumberInListing(years);
const months = await this.client.getListing(`${this.channelPath}/${year}/`);
const month = lowestNumberInListing(months);
const days = await this.client.getListing(`${this.channelPath}/${year}/${pad(month)}/`);
const day = lowestNumberInListing(days);
const firstId = `${year}/${pad(month)}/${pad(day)}`;
console.debug('[chat-messages]', 'First is', firstId);
return firstId;
}
/* /*
* Write archive document * Write archive document
* *
@ -544,7 +667,7 @@ const ChatMessages = function (privateClient, publicClient) {
* *
* @private * @private
*/ */
_sync (obj) { async _sync (obj) {
console.debug(`[chat-messages] Writing archive object with ${obj.today.messages.length} messages`); console.debug(`[chat-messages] Writing archive object with ${obj.today.messages.length} messages`);
return this.client.storeObject('daily-archive', this.path, obj).then(function(){ return this.client.storeObject('daily-archive', this.path, obj).then(function(){