From 54352e09e8d070fa5488b19220fb62e6ae598544 Mon Sep 17 00:00:00 2001 From: Sebastian Kippe Date: Thu, 4 Nov 2021 23:51:07 +0100 Subject: [PATCH] Store meta document with first/last archive ID --- package-lock.json | 6 +++ package.json | 3 +- src/chat-messages.js | 125 +++++++++++++++++++++++++++++++++---------- 3 files changed, 106 insertions(+), 28 deletions(-) diff --git a/package-lock.json b/package-lock.json index 84a6b1e..de0c8b9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2215,6 +2215,12 @@ "regenerate": "^1.4.0" } }, + "regenerator-runtime": { + "version": "0.13.9", + "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz", + "integrity": "sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA==", + "dev": true + }, "resolve": { "version": "1.20.0", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz", diff --git a/package.json b/package.json index 5547183..fd34f55 100644 --- a/package.json +++ b/package.json @@ -22,6 +22,7 @@ "@babel/preset-env": "^7.14.9", "babel-loader": "^8.2.2", "webpack": "^5.48.0", - "webpack-cli": "^4.8.0" + "webpack-cli": "^4.8.0", + "regenerator-runtime": "^0.13.9" } } diff --git a/src/chat-messages.js b/src/chat-messages.js index 5e25d8b..6dc925e 100644 --- a/src/chat-messages.js +++ b/src/chat-messages.js @@ -1,3 +1,5 @@ +import 'regenerator-runtime/runtime'; + function pad (num) { num = String(num); if (num.length === 1) { num = "0" + num; } @@ -12,6 +14,14 @@ function parseDate (date) { }; }; +function lowestNumberInListing(items) { + const sortedNumbers = Object.keys(items) + .map(i => parseInt(i)) + .filter(i => !Number.isNaN(i)) + .sort(function(a, b) { return a - b }); + return sortedNumbers[0]; +} + const ChatMessages = function (privateClient, publicClient) { /** * Schema: chat-messages/daily-archive @@ -48,8 +58,7 @@ const ChatMessages = function (privateClient, publicClient) { "properties": { "@context": { "type": "string", - "default": "https://kosmos.org/ns/v2", - "enum": ["https://kosmos.org/ns/v2"] + "default": "https://kosmos.org/ns/v2/chat-channel" }, "@id": { "type": "string", @@ -141,8 +150,8 @@ const ChatMessages = function (privateClient, publicClient) { "required": [] }; - privateClient.declareType("daily-archive", "https://kosmos.org/ns/v2", archiveSchema); - publicClient.declareType("daily-archive", "https://kosmos.org/ns/v2", archiveSchema); + privateClient.declareType("daily-archive", "https://kosmos.org/ns/v2/chat-channel", archiveSchema); + publicClient.declareType("daily-archive", "https://kosmos.org/ns/v2/chat-channel", archiveSchema); /** * Schema: chat-messages/daily-archive-meta @@ -153,7 +162,7 @@ const ChatMessages = function (privateClient, publicClient) { * { * "@context": "https://kosmos.org/ns/v2", * "@id": "chat-messages/irc.libera.chat/channels/kosmos/meta", - * "@type": "ChatChannelMetadata", + * "@type": "ChatChannelMeta", * "first": "2009/01/03", * "last": "2021/11/05" * } @@ -164,16 +173,15 @@ const ChatMessages = function (privateClient, publicClient) { "properties": { "@context": { "type": "string", - "default": "https://kosmos.org/ns/v2", - "enum": ["https://kosmos.org/ns/v2"] + "default": "https://kosmos.org/ns/v2/chat-channel-meta" }, "@id": { "type": "string", }, "@type": { "type": "string", - "default": "ChatChannel", - "enum": ["ChatChannel"] + "default": "ChatChannelMeta", + "enum": ["ChatChannelMeta"] }, "first": { "type": "string", @@ -187,7 +195,8 @@ const ChatMessages = function (privateClient, publicClient) { "required": ["@id", "first", "last"] }; - privateClient.declareType("daily-archive-meta", "https://kosmos.org/ns/v2", archiveMetaSchema); + privateClient.declareType("daily-archive-meta", "https://kosmos.org/ns/v2/chat-channel-meta", archiveMetaSchema); + publicClient.declareType("daily-archive-meta", "https://kosmos.org/ns/v2/chat-channel-meta", archiveMetaSchema); /** * A daily archive stores chat messages by calendar day. @@ -302,17 +311,27 @@ const ChatMessages = function (privateClient, publicClient) { this.dateId = this.parsedDate.year+'/'+this.parsedDate.month+'/'+this.parsedDate.day; /** - * @property {string} path - Document path of the archive file + * @property {string} channelPath - Base directory path of the channel archives */ if (this.channelType === "room") { // Normal chatroom const channelName = this.channelName.replace(/#/,''); - this.path = `${this.service.domain}/channels/${channelName}/${this.dateId}`; + this.channelPath = `${this.service.domain}/channels/${channelName}`; } else { - // User direct message - this.path = `${this.service.domain}/users/${this.channelName}/${this.dateId}`; + // User direct messages + this.channelPath = `${this.service.domain}/users/${this.channelName}`; } + /** + * @property {string} path - Path of the archive document + */ + this.path = `${this.channelPath}/${this.dateId}`; + + /** + * @property {string} metaPath - Path of the channel's metadata document + */ + this.metaPath = `${this.channelPath}/meta`; + /** * @property {object} client - Public or private remoteStorgage.js BaseClient */ @@ -394,6 +413,8 @@ const ChatMessages = function (privateClient, publicClient) { * @returns {Promise} */ remove () { + // TODO when removing, if previous is set, but not next, it means the + // removed file is the last archive. Thus, set "last" to previous file. return this.client.remove(this.path); } @@ -404,7 +425,7 @@ const ChatMessages = function (privateClient, publicClient) { * * @private */ - _updateDocument (archive, messages) { + async _updateDocument (archive, messages) { console.debug('[chat-messages] Updating archive document'); if (Array.isArray(messages)) { @@ -425,12 +446,12 @@ const ChatMessages = function (privateClient, publicClient) { * * @private */ - _createDocument (messages) { + async _createDocument (messages) { console.debug('[chat-messages] Creating new archive document'); const archive = this._buildArchiveObject(); if (Array.isArray(messages)) { - messages.forEach((message) => { + messages.forEach(message => { archive.today.messages.push(message); }); } else { @@ -442,16 +463,22 @@ const ChatMessages = function (privateClient, publicClient) { // That includes setting 'next' in the previous log file if (this.previous) { archive.today.previous = this.previous; } if (this.next) { archive.today.next = this.next; } - return this._sync(archive); } else { // Find and update previous archive, set 'previous' on this one - return this._updatePreviousArchive().then((previous) => { - if (typeof previous === 'object') { - archive.today.previous = previous.today['@id']; - } - return this._sync(archive); - }); + const previous = await this._updatePreviousArchive(); + if (typeof previous === 'object') { + archive.today.previous = previous.today['@id']; + } } + + await this._sync(archive); + + // TODO only write meta doc if argument is set on addMessages. This way + // we can avoid race conditions when syncing remote chat messages all at + // once for multiple days + await this._updateArchiveMetaDocument(); + + return; } /* @@ -582,11 +609,55 @@ const ChatMessages = function (privateClient, publicClient) { }); } - _createArchiveMetaDocument () { + async _updateArchiveMetaDocument () { + const meta = await this.client.getObject(this.metaPath); + if (typeof meta !== 'object') { + return this._createArchiveMetaDocument(); + } + // Only update document if current date is newer than known "last" + if (Date.parse(meta.last.replace('/','-')) > Date.parse(this.date)) { + console.debug('[chat-messages]', 'Updating meta document for channel'); + meta.last = this.dateId; + await this.client.storeObject('daily-archive-meta', this.metaPath, meta); + } + + return; } - _updateArchiveMetaDocument () { + async _createArchiveMetaDocument () { + console.debug('[chat-messages]', 'Creating new meta document for channel'); + // When creating a new meta doc, we need to find the oldest archive, + // because older versions of the module did not write a meta doc. + const first = await this._findFirstArchive(); + const roomName = this.channelName.replace(/#/,''); + + const meta = { + '@id': `chat-messages/${this.service.domain}/channels/${roomName}/meta`, + '@type': 'ChatChannelMeta', + first: first, + last: this.dateId // TODO might have to search for last? + }; + + return this.client.storeObject('daily-archive-meta', this.metaPath, meta) + .then(() => console.debug('[chat-messages]', 'Meta document written to remote storage')) + .catch(e => { + console.log('[chat-messages]', `Failed to store ${this.metaPath}`); + console.error(e); + }); + } + + async _findFirstArchive () { + console.debug('[chat-messages]', 'Finding first archive for channel'); + const years = await this.client.getListing(`${this.channelPath}/`); + const year = lowestNumberInListing(years); + const months = await this.client.getListing(`${this.channelPath}/${year}/`); + const month = lowestNumberInListing(months); + const days = await this.client.getListing(`${this.channelPath}/${year}/${pad(month)}/`); + const day = lowestNumberInListing(days); + const firstId = `${year}/${pad(month)}/${pad(day)}`; + console.debug('[chat-messages]', 'First is', firstId); + return firstId; } /* @@ -596,7 +667,7 @@ const ChatMessages = function (privateClient, publicClient) { * * @private */ - _sync (obj) { + async _sync (obj) { console.debug(`[chat-messages] Writing archive object with ${obj.today.messages.length} messages`); return this.client.storeObject('daily-archive', this.path, obj).then(function(){