Add/store channel archive metadata document #8

Merged
raucao merged 3 commits from feature/7-archive_metadata into master 2021-11-13 20:56:18 +00:00
3 changed files with 151 additions and 21 deletions

6
package-lock.json generated
View File

@ -2215,6 +2215,12 @@
"regenerate": "^1.4.0"
}
},
"regenerator-runtime": {
"version": "0.13.9",
"resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.9.tgz",
"integrity": "sha512-p3VT+cOEgxFsRRA9X4lkI1E+k2/CtnKtU4gcxyaCUreilL/vqI6CdZ3wxVUx3UOUg+gnUOQQcRI7BmSI656MYA==",
"dev": true
},
"resolve": {
"version": "1.20.0",
"resolved": "https://registry.npmjs.org/resolve/-/resolve-1.20.0.tgz",

View File

@ -22,6 +22,7 @@
"@babel/preset-env": "^7.14.9",
"babel-loader": "^8.2.2",
"webpack": "^5.48.0",
"webpack-cli": "^4.8.0"
"webpack-cli": "^4.8.0",
"regenerator-runtime": "^0.13.9"
}
}

View File

@ -1,3 +1,5 @@
import 'regenerator-runtime/runtime';
function pad (num) {
num = String(num);
if (num.length === 1) { num = "0" + num; }
@ -12,9 +14,17 @@ function parseDate (date) {
};
};
function lowestNumberInListing(items) {
const sortedNumbers = Object.keys(items)
.map(i => parseInt(i))
.filter(i => !Number.isNaN(i))
.sort();

.sort() is enough, as it's the default sort order.

`.sort()` is enough, as it's the default sort order.
return sortedNumbers[0];
}
const ChatMessages = function (privateClient, publicClient) {
/**
* Schema: chat-messages/daily
* Schema: chat-messages/daily-archive
*
* Represents one calendar day of chat messages
*
@ -48,8 +58,7 @@ const ChatMessages = function (privateClient, publicClient) {
"properties": {
"@context": {
"type": "string",
"default": "https://kosmos.org/ns/v2",
"enum": ["https://kosmos.org/ns/v2"]
"default": "https://kosmos.org/ns/v2/chat-channel"
},
"@id": {
"type": "string",
@ -141,8 +150,53 @@ const ChatMessages = function (privateClient, publicClient) {
"required": []
};
privateClient.declareType("daily-archive", "https://kosmos.org/ns/v2", archiveSchema);
publicClient.declareType("daily-archive", "https://kosmos.org/ns/v2", archiveSchema);
privateClient.declareType("daily-archive", "https://kosmos.org/ns/v2/chat-channel", archiveSchema);
publicClient.declareType("daily-archive", "https://kosmos.org/ns/v2/chat-channel", archiveSchema);
/**
* Schema: chat-messages/daily-archive-meta
*
* Stores meta information about the daily archives
*
* @example
* {
* "@context": "https://kosmos.org/ns/v2",
* "@id": "chat-messages/irc.libera.chat/channels/kosmos/meta",
* "@type": "ChatChannelMeta",
* "first": "2009/01/03",
* "last": "2021/11/05"
* }
* }
*/
const archiveMetaSchema = {
"type": "object",
"properties": {
"@context": {
"type": "string",
"default": "https://kosmos.org/ns/v2/chat-channel-meta"
},
"@id": {
"type": "string",
},
"@type": {
"type": "string",
"default": "ChatChannelMeta",
"enum": ["ChatChannelMeta"]
},
"first": {
"type": "string",
"pattern": "^[0-9]{4}\/[0-9]{2}\/[0-9]{2}$"
},
"last": {
"type": "string",
"pattern": "^[0-9]{4}\/[0-9]{2}\/[0-9]{2}$"
}
},
"required": ["@id", "first", "last"]
};
privateClient.declareType("daily-archive-meta", "https://kosmos.org/ns/v2/chat-channel-meta", archiveMetaSchema);
publicClient.declareType("daily-archive-meta", "https://kosmos.org/ns/v2/chat-channel-meta", archiveMetaSchema);
/**
* A daily archive stores chat messages by calendar day.
@ -257,17 +311,27 @@ const ChatMessages = function (privateClient, publicClient) {
this.dateId = this.parsedDate.year+'/'+this.parsedDate.month+'/'+this.parsedDate.day;
/**
* @property {string} path - Document path of the archive file
* @property {string} channelPath - Base directory path of the channel archives
*/
if (this.channelType === "room") {
// Normal chatroom
const channelName = this.channelName.replace(/#/,'');
this.path = `${this.service.domain}/channels/${channelName}/${this.dateId}`;
this.channelPath = `${this.service.domain}/channels/${channelName}`;
} else {
// User direct message
this.path = `${this.service.domain}/users/${this.channelName}/${this.dateId}`;
// User direct messages
this.channelPath = `${this.service.domain}/users/${this.channelName}`;
}
/**
* @property {string} path - Path of the archive document
*/
this.path = `${this.channelPath}/${this.dateId}`;
/**
* @property {string} metaPath - Path of the channel's metadata document
*/
this.metaPath = `${this.channelPath}/meta`;
/**
* @property {object} client - Public or private remoteStorgage.js BaseClient
*/
@ -349,6 +413,8 @@ const ChatMessages = function (privateClient, publicClient) {
* @returns {Promise}
*/
remove () {
// TODO when removing, if previous is set, but not next, it means the
// removed file is the last archive. Thus, set "last" to previous file.
return this.client.remove(this.path);
}
@ -359,7 +425,7 @@ const ChatMessages = function (privateClient, publicClient) {
*
* @private
*/
_updateDocument (archive, messages) {
async _updateDocument (archive, messages) {
console.debug('[chat-messages] Updating archive document');
if (Array.isArray(messages)) {
@ -380,12 +446,12 @@ const ChatMessages = function (privateClient, publicClient) {
*
* @private
*/
_createDocument (messages) {
async _createDocument (messages) {
console.debug('[chat-messages] Creating new archive document');
const archive = this._buildArchiveObject();
if (Array.isArray(messages)) {
messages.forEach((message) => {
messages.forEach(message => {
archive.today.messages.push(message);
});
} else {
@ -397,16 +463,22 @@ const ChatMessages = function (privateClient, publicClient) {
// That includes setting 'next' in the previous log file
if (this.previous) { archive.today.previous = this.previous; }
if (this.next) { archive.today.next = this.next; }
return this._sync(archive);
} else {
// Find and update previous archive, set 'previous' on this one
return this._updatePreviousArchive().then((previous) => {
if (typeof previous === 'object') {
archive.today.previous = previous.today['@id'];
}
return this._sync(archive);
});
const previous = await this._updatePreviousArchive();
if (typeof previous === 'object') {
archive.today.previous = previous.today['@id'];
}
}
await this._sync(archive);
// TODO only write meta doc if argument is set on addMessages. This way
// we can avoid race conditions when syncing remote chat messages all at
// once for multiple days
await this._updateArchiveMetaDocument();
return;
}
/*
@ -537,6 +609,57 @@ const ChatMessages = function (privateClient, publicClient) {
});
}
async _updateArchiveMetaDocument () {
const meta = await this.client.getObject(this.metaPath);
if (typeof meta !== 'object') {
return this._createArchiveMetaDocument();
}
// Only update document if current date is newer than known "last"
if (Date.parse(meta.last.replace('/','-')) > Date.parse(this.date)) {
console.debug('[chat-messages]', 'Updating meta document for channel');
meta.last = this.dateId;
await this.client.storeObject('daily-archive-meta', this.metaPath, meta);
}
return;
}
async _createArchiveMetaDocument () {
console.debug('[chat-messages]', 'Creating new meta document for channel');
// When creating a new meta doc, we need to find the oldest archive,
// because older versions of the module did not write a meta doc.
const first = await this._findFirstArchive();
const roomName = this.channelName.replace(/#/,'');
const meta = {
'@id': `chat-messages/${this.service.domain}/channels/${roomName}/meta`,
'@type': 'ChatChannelMeta',
first: first,
last: this.dateId // TODO might have to search for last?
};
return this.client.storeObject('daily-archive-meta', this.metaPath, meta)
.then(() => console.debug('[chat-messages]', 'Meta document written to remote storage'))
.catch(e => {
console.log('[chat-messages]', `Failed to store ${this.metaPath}`);
console.error(e);
});
}
async _findFirstArchive () {
console.debug('[chat-messages]', 'Finding first archive for channel');
const years = await this.client.getListing(`${this.channelPath}/`);
const year = lowestNumberInListing(years);
const months = await this.client.getListing(`${this.channelPath}/${year}/`);
const month = lowestNumberInListing(months);
const days = await this.client.getListing(`${this.channelPath}/${year}/${pad(month)}/`);
const day = lowestNumberInListing(days);
const firstId = `${year}/${pad(month)}/${pad(day)}`;
console.debug('[chat-messages]', 'First is', firstId);
return firstId;
}
/*
* Write archive document
*
@ -544,7 +667,7 @@ const ChatMessages = function (privateClient, publicClient) {
*
* @private
*/
_sync (obj) {
async _sync (obj) {
console.debug(`[chat-messages] Writing archive object with ${obj.today.messages.length} messages`);
return this.client.storeObject('daily-archive', this.path, obj).then(function(){