This commit contains all changes starting with the first integration of the plugin into the gajim_1.1 branch (76dabe2) until the current plugin version V3 (42b9aeb). The manifest.ini is updated for compatibility with upcoming Gajim versions.
272 lines
11 KiB
Python
272 lines
11 KiB
Python
import logging
|
|
import re
|
|
import pygments
|
|
|
|
from gi.repository import Gtk
|
|
|
|
from gajim.plugins.helpers import log
|
|
|
|
|
|
from .gtkformatter import GTKFormatter
|
|
from .types import MatchType, LineBreakOptions, CodeMarkerOptions
|
|
|
|
|
|
log = logging.getLogger('gajim.plugin_system.syntax_highlight')
|
|
|
|
class ChatSyntaxHighlighter:
|
|
def hide_code_markup(self, buf, start, end):
|
|
tag = buf.get_tag_table().lookup('hide_code_markup')
|
|
if tag is None:
|
|
tag = Gtk.TextTag.new('hide_code_markup')
|
|
tag.set_property('invisible', True)
|
|
buf.get_tag_table().add(tag)
|
|
|
|
buf.apply_tag_by_name('hide_code_markup', start, end)
|
|
|
|
def check_line_break(self, is_multiline):
|
|
line_break = self.config.get_line_break_action()
|
|
|
|
return (line_break == LineBreakOptions.ALWAYS) \
|
|
or (is_multiline and line_break == LineBreakOptions.MULTILINE)
|
|
|
|
|
|
def format_code(self, buf, s_tag, s_code, e_tag, e_code, language):
|
|
style = self.config.get_style_name()
|
|
if self.config.get_code_marker_setting() == CodeMarkerOptions.HIDE:
|
|
self.hide_code_markup(buf, s_tag, s_code)
|
|
self.hide_code_markup(buf, e_code, e_tag)
|
|
else:
|
|
comment_tag = GTKFormatter.create_tag_for_token(
|
|
pygments.token.Comment,
|
|
pygments.styles.get_style_by_name(style))
|
|
buf.get_tag_table().add(comment_tag)
|
|
buf.apply_tag(comment_tag, s_tag, s_code)
|
|
buf.apply_tag(comment_tag, e_tag, e_code)
|
|
|
|
code = s_code.get_text(e_code)
|
|
log.debug("full text to encode: %s.", code)
|
|
|
|
|
|
start_mark = buf.create_mark(None, s_code, False)
|
|
|
|
lexer = None
|
|
|
|
if language is None:
|
|
lexer = self.config.get_default_lexer()
|
|
log.info("No Language specified. Falling back to default lexer: %s.",
|
|
self.config.get_default_lexer_name())
|
|
else:
|
|
log.debug("Using lexer for %s.", str(language))
|
|
lexer = self.config.get_lexer_with_fallback(language)
|
|
|
|
if lexer is None:
|
|
iterator = buf.get_iter_at_mark(start_mark)
|
|
buf.insert(iterator, '\n')
|
|
elif not self.config.is_internal_none_lexer(lexer):
|
|
tokens = pygments.lex(code, lexer)
|
|
|
|
formatter = GTKFormatter(style=style, start_mark=start_mark)
|
|
pygments.format(tokens, formatter, buf)
|
|
|
|
def find_multiline_matches(self, text):
|
|
start = None
|
|
matches = []
|
|
#Less strict, allow prefixed whitespaces: for i in re.finditer(r'(?:^|\n)[ |\t]*(```)\S*[ |\t]*(?:\n|$)', text, re.DOTALL):
|
|
for i in re.finditer(r'(?:^|\n)(```)\S*(?:\n|$)', text, re.DOTALL):
|
|
if start is None:
|
|
start = i
|
|
elif re.match(r'^\n```', i.group(0)) is not None:
|
|
matches.append(
|
|
(start.start(), i.end(), text[start.start():i.end()]))
|
|
start = None
|
|
else:
|
|
# not an end...
|
|
continue
|
|
return matches
|
|
|
|
def find_inline_matches(self, text):
|
|
"""
|
|
Inline code is highlighted if the start marker is precedded by a start
|
|
of line, a whitespace character or either of the other span markers
|
|
defined in XEP-0393.
|
|
The same applies mirrored to the end marker.
|
|
"""
|
|
return [(i.start(1), i.end(1), i.group(1)) for i in \
|
|
re.finditer(r'(?:^|\s|\*|~|_)(`((?!`).+?)`)(?:\s|\*|~|_|$)', text)]
|
|
|
|
def merge_match_groups(self, real_text, inline_matches, multiline_matches):
|
|
it_inline = iter(inline_matches)
|
|
it_multi = iter(multiline_matches)
|
|
length = len(real_text)
|
|
|
|
# Just to get cleaner code below...
|
|
def get_next(iterator):
|
|
return next(iterator, (length, length, ""))
|
|
|
|
# In order to simplify the process, we use the 'length' here.
|
|
cur_inline = get_next(it_inline)
|
|
cur_multi = get_next(it_multi)
|
|
|
|
pos = 0
|
|
|
|
# This will contain tuples with parts of the input and its classification
|
|
parts = []
|
|
while pos < length:
|
|
log.debug("-> in: %s", str(cur_inline))
|
|
log.debug("-> mu: %s", str(cur_multi))
|
|
|
|
# selected = (start, end, type)
|
|
selected = (cur_inline[0], cur_inline[1], MatchType.INLINE) \
|
|
if cur_inline[0] < cur_multi[0] \
|
|
else (cur_multi[0], cur_multi[1], MatchType.MULTILINE) \
|
|
if cur_multi[0] < length \
|
|
else (pos, length, MatchType.TEXT)
|
|
log.debug("--> select: %s", str(selected))
|
|
|
|
# Handle plain text string parts (and unforseen errors...)
|
|
if pos < selected[0]:
|
|
end = selected[0] if selected[0] != pos else selected[1]
|
|
parts.append((real_text[pos:end], MatchType.TEXT))
|
|
pos = selected[0]
|
|
elif pos > selected[0]:
|
|
log.error("Should not happen, position > found match.")
|
|
|
|
# Cut out and append selected text segment
|
|
parts.append((real_text[selected[0]:selected[1]], selected[2]))
|
|
pos = selected[1]
|
|
|
|
# Depending on the match type, we have to forward the iterators.
|
|
# Also, forward the other one, if regions overlap or we took over...
|
|
if selected[2] == MatchType.INLINE:
|
|
if cur_multi[0] < cur_inline[1]:
|
|
cur_multi = get_next(it_multi)
|
|
cur_inline = get_next(it_inline)
|
|
elif selected[2] == MatchType.MULTILINE:
|
|
if cur_inline[0] < cur_multi[1]:
|
|
cur_inline = get_next(it_inline)
|
|
cur_multi = get_next(it_multi)
|
|
|
|
return parts
|
|
|
|
def process_text(self, real_text, other_tags, _graphics, iter_,
|
|
_additional):
|
|
def fix_newline(char, marker_len_no_newline, force=False):
|
|
fixed = (marker_len_no_newline, '')
|
|
if char == '\n':
|
|
fixed = (marker_len_no_newline + 1, '')
|
|
elif force:
|
|
fixed = (marker_len_no_newline + 1, '\n')
|
|
return fixed
|
|
|
|
|
|
buf = self.textview.tv.get_buffer()
|
|
|
|
# first, try to find inline or multiline code snippets
|
|
inline_matches = self.find_inline_matches(real_text)
|
|
multiline_matches = self.find_multiline_matches(real_text)
|
|
|
|
if not inline_matches and not multiline_matches:
|
|
log.debug("Stopping early, since there is no code block in it....")
|
|
return
|
|
|
|
iterator = iter_ if iter_ is not None else buf.get_end_iter()
|
|
|
|
# Create a start marker with left gravity before inserting text.
|
|
start_mark = buf.create_mark("SHP_start", iterator, True)
|
|
end_mark = buf.create_mark("SHP_end", iterator, False)
|
|
|
|
insert_newline_for_multiline = self.check_line_break(True)
|
|
insert_newline_for_inline = self.check_line_break(False)
|
|
|
|
split_text = self.merge_match_groups(
|
|
real_text, inline_matches, multiline_matches)
|
|
|
|
buf.begin_user_action()
|
|
|
|
for num, (text_to_insert, match_type) in enumerate(split_text):
|
|
language = None
|
|
end_of_message = num == (len(split_text) - 1)
|
|
|
|
if match_type == MatchType.TEXT:
|
|
self.textview.detect_and_print_special_text(
|
|
text_to_insert, other_tags, graphics=_graphics,
|
|
iter_=iterator, additional_data=_additional)
|
|
else:
|
|
if match_type == MatchType.MULTILINE:
|
|
language_match = re.search(
|
|
'\n*```([^\n]*)\n', text_to_insert, re.DOTALL)
|
|
language = None if language_match is None \
|
|
else language_match.group(1)
|
|
language_len = 0 if language is None else len(language)
|
|
|
|
# We account the language word width for the front marker
|
|
front = fix_newline(text_to_insert[0], 3 + language_len,
|
|
insert_newline_for_multiline)
|
|
back = fix_newline(text_to_insert[-1], 3,
|
|
insert_newline_for_multiline and not end_of_message)
|
|
else:
|
|
front = fix_newline(text_to_insert[0], 1,
|
|
insert_newline_for_inline)
|
|
back = fix_newline(text_to_insert[-1], 1,
|
|
insert_newline_for_inline and not end_of_message)
|
|
|
|
marker_widths = (front[0], back[0])
|
|
text_to_insert = ''.join([front[1], text_to_insert, back[1]])
|
|
|
|
# insertion invalidates iterator, let's use our start mark...
|
|
self.insert_and_format_code(buf, text_to_insert, language,
|
|
marker_widths, start_mark, end_mark, other_tags)
|
|
|
|
iterator = buf.get_iter_at_mark(end_mark)
|
|
# the current end of the buffer's contents is the start for the
|
|
# next iteration
|
|
buf.move_mark(start_mark, iterator)
|
|
|
|
buf.delete_mark(start_mark)
|
|
buf.delete_mark(end_mark)
|
|
|
|
buf.end_user_action()
|
|
|
|
# We have to make sure this is the last thing we do (i.e. no calls to
|
|
# the other textview methods no more from here on), because the
|
|
# print_special_text method is resetting the plugin_modified variable...
|
|
self.textview.plugin_modified = True
|
|
|
|
def insert_and_format_code(self, buf, insert_text, language, marker,
|
|
start_mark, end_mark, other_tags=None):
|
|
|
|
start_iter = buf.get_iter_at_mark(start_mark)
|
|
|
|
if other_tags:
|
|
buf.insert_with_tags_by_name(start_iter, insert_text,
|
|
*other_tags)
|
|
else:
|
|
buf.insert(start_iter, insert_text)
|
|
|
|
tag_start = buf.get_iter_at_mark(start_mark)
|
|
tag_end = buf.get_iter_at_mark(end_mark)
|
|
s_code = tag_start.copy()
|
|
e_code = tag_end.copy()
|
|
s_code.forward_chars(marker[0])
|
|
e_code.backward_chars(marker[1])
|
|
|
|
log.debug("full text between tags: %s.", tag_start.get_text(tag_end))
|
|
|
|
self.format_code(buf, tag_start, s_code, tag_end, e_code, language)
|
|
|
|
self.textview.plugin_modified = True
|
|
|
|
# Set general code block format
|
|
tag = Gtk.TextTag.new()
|
|
if self.config.is_bgcolor_override_enabled():
|
|
tag.set_property('background', self.config.get_bgcolor())
|
|
tag.set_property('paragraph-background', self.config.get_bgcolor())
|
|
tag.set_property('font', self.config.get_font())
|
|
buf.get_tag_table().add(tag)
|
|
buf.apply_tag(tag, tag_start, tag_end)
|
|
|
|
def __init__(self, config, textview):
|
|
self.last_end_mark = None
|
|
self.config = config
|
|
self.textview = textview
|