Added support for multiple tags on a single model

2024-05-18 15:52:50 -06:00
parent 8ddce304b2
commit 02acbb2d70
571 changed files with 76910 additions and 127 deletions
--- a/.flatpak-builder/cache/objects/aa/99f2f6da3b9d8a6d935c1945979d761b48884c5a8c2f3f3249c938f4a52bc8.file
+++ b/.flatpak-builder/cache/objects/aa/99f2f6da3b9d8a6d935c1945979d761b48884c5a8c2f3f3249c938f4a52bc8.file
@@ -0,0 +1,205 @@
+#
+# The Python Imaging Library.
+# $Id$
+#
+# a simple Qt image interface.
+#
+# history:
+# 2006-06-03 fl: created
+# 2006-06-04 fl: inherit from QImage instead of wrapping it
+# 2006-06-05 fl: removed toimage helper; move string support to ImageQt
+# 2013-11-13 fl: add support for Qt5 (aurelien.ballier@cyclonit.com)
+#
+# Copyright (c) 2006 by Secret Labs AB
+# Copyright (c) 2006 by Fredrik Lundh
+#
+# See the README file for information on usage and redistribution.
+#
+from __future__ import annotations
+
+import sys
+from io import BytesIO
+from typing import Callable
+
+from . import Image
+from ._util import is_path
+
+qt_version: str | None
+qt_versions = [
+    ["6", "PyQt6"],
+    ["side6", "PySide6"],
+]
+
+# If a version has already been imported, attempt it first
+qt_versions.sort(key=lambda version: version[1] in sys.modules, reverse=True)
+for version, qt_module in qt_versions:
+    try:
+        QBuffer: type
+        QIODevice: type
+        QImage: type
+        QPixmap: type
+        qRgba: Callable[[int, int, int, int], int]
+        if qt_module == "PyQt6":
+            from PyQt6.QtCore import QBuffer, QIODevice
+            from PyQt6.QtGui import QImage, QPixmap, qRgba
+        elif qt_module == "PySide6":
+            from PySide6.QtCore import QBuffer, QIODevice
+            from PySide6.QtGui import QImage, QPixmap, qRgba
+    except (ImportError, RuntimeError):
+        continue
+    qt_is_installed = True
+    qt_version = version
+    break
+else:
+    qt_is_installed = False
+    qt_version = None
+
+
+def rgb(r, g, b, a=255):
+    """(Internal) Turns an RGB color into a Qt compatible color integer."""
+    # use qRgb to pack the colors, and then turn the resulting long
+    # into a negative integer with the same bitpattern.
+    return qRgba(r, g, b, a) & 0xFFFFFFFF
+
+
+def fromqimage(im):
+    """
+    :param im: QImage or PIL ImageQt object
+    """
+    buffer = QBuffer()
+    if qt_version == "6":
+        try:
+            qt_openmode = QIODevice.OpenModeFlag
+        except AttributeError:
+            qt_openmode = QIODevice.OpenMode
+    else:
+        qt_openmode = QIODevice
+    buffer.open(qt_openmode.ReadWrite)
+    # preserve alpha channel with png
+    # otherwise ppm is more friendly with Image.open
+    if im.hasAlphaChannel():
+        im.save(buffer, "png")
+    else:
+        im.save(buffer, "ppm")
+
+    b = BytesIO()
+    b.write(buffer.data())
+    buffer.close()
+    b.seek(0)
+
+    return Image.open(b)
+
+
+def fromqpixmap(im):
+    return fromqimage(im)
+
+
+def align8to32(bytes, width, mode):
+    """
+    converts each scanline of data from 8 bit to 32 bit aligned
+    """
+
+    bits_per_pixel = {"1": 1, "L": 8, "P": 8, "I;16": 16}[mode]
+
+    # calculate bytes per line and the extra padding if needed
+    bits_per_line = bits_per_pixel * width
+    full_bytes_per_line, remaining_bits_per_line = divmod(bits_per_line, 8)
+    bytes_per_line = full_bytes_per_line + (1 if remaining_bits_per_line else 0)
+
+    extra_padding = -bytes_per_line % 4
+
+    # already 32 bit aligned by luck
+    if not extra_padding:
+        return bytes
+
+    new_data = [
+        bytes[i * bytes_per_line : (i + 1) * bytes_per_line] + b"\x00" * extra_padding
+        for i in range(len(bytes) // bytes_per_line)
+    ]
+
+    return b"".join(new_data)
+
+
+def _toqclass_helper(im):
+    data = None
+    colortable = None
+    exclusive_fp = False
+
+    # handle filename, if given instead of image name
+    if hasattr(im, "toUtf8"):
+        # FIXME - is this really the best way to do this?
+        im = str(im.toUtf8(), "utf-8")
+    if is_path(im):
+        im = Image.open(im)
+        exclusive_fp = True
+
+    qt_format = QImage.Format if qt_version == "6" else QImage
+    if im.mode == "1":
+        format = qt_format.Format_Mono
+    elif im.mode == "L":
+        format = qt_format.Format_Indexed8
+        colortable = [rgb(i, i, i) for i in range(256)]
+    elif im.mode == "P":
+        format = qt_format.Format_Indexed8
+        palette = im.getpalette()
+        colortable = [rgb(*palette[i : i + 3]) for i in range(0, len(palette), 3)]
+    elif im.mode == "RGB":
+        # Populate the 4th channel with 255
+        im = im.convert("RGBA")
+
+        data = im.tobytes("raw", "BGRA")
+        format = qt_format.Format_RGB32
+    elif im.mode == "RGBA":
+        data = im.tobytes("raw", "BGRA")
+        format = qt_format.Format_ARGB32
+    elif im.mode == "I;16" and hasattr(qt_format, "Format_Grayscale16"):  # Qt 5.13+
+        im = im.point(lambda i: i * 256)
+
+        format = qt_format.Format_Grayscale16
+    else:
+        if exclusive_fp:
+            im.close()
+        msg = f"unsupported image mode {repr(im.mode)}"
+        raise ValueError(msg)
+
+    size = im.size
+    __data = data or align8to32(im.tobytes(), size[0], im.mode)
+    if exclusive_fp:
+        im.close()
+    return {"data": __data, "size": size, "format": format, "colortable": colortable}
+
+
+if qt_is_installed:
+
+    class ImageQt(QImage):
+        def __init__(self, im):
+            """
+            An PIL image wrapper for Qt.  This is a subclass of PyQt's QImage
+            class.
+
+            :param im: A PIL Image object, or a file name (given either as
+                Python string or a PyQt string object).
+            """
+            im_data = _toqclass_helper(im)
+            # must keep a reference, or Qt will crash!
+            # All QImage constructors that take data operate on an existing
+            # buffer, so this buffer has to hang on for the life of the image.
+            # Fixes https://github.com/python-pillow/Pillow/issues/1370
+            self.__data = im_data["data"]
+            super().__init__(
+                self.__data,
+                im_data["size"][0],
+                im_data["size"][1],
+                im_data["format"],
+            )
+            if im_data["colortable"]:
+                self.setColorTable(im_data["colortable"])
+
+
+def toqimage(im):
+    return ImageQt(im)
+
+
+def toqpixmap(im):
+    qimage = toqimage(im)
+    return QPixmap.fromImage(qimage)
--- a/.flatpak-builder/cache/objects/aa/d5610af85eb47894eb82d252ced5471b360ac6bac536a6cd950a1f47ed3999.file
+++ b/.flatpak-builder/cache/objects/aa/d5610af85eb47894eb82d252ced5471b360ac6bac536a6cd950a1f47ed3999.file
@@ -0,0 +1,296 @@
+import argparse
+import sys
+from json import dumps
+from os.path import abspath, basename, dirname, join, realpath
+from platform import python_version
+from typing import List, Optional
+from unicodedata import unidata_version
+
+import charset_normalizer.md as md_module
+from charset_normalizer import from_fp
+from charset_normalizer.models import CliDetectionResult
+from charset_normalizer.version import __version__
+
+
+def query_yes_no(question: str, default: str = "yes") -> bool:
+    """Ask a yes/no question via input() and return their answer.
+
+    "question" is a string that is presented to the user.
+    "default" is the presumed answer if the user just hits <Enter>.
+        It must be "yes" (the default), "no" or None (meaning
+        an answer is required of the user).
+
+    The "answer" return value is True for "yes" or False for "no".
+
+    Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
+    """
+    valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
+    if default is None:
+        prompt = " [y/n] "
+    elif default == "yes":
+        prompt = " [Y/n] "
+    elif default == "no":
+        prompt = " [y/N] "
+    else:
+        raise ValueError("invalid default answer: '%s'" % default)
+
+    while True:
+        sys.stdout.write(question + prompt)
+        choice = input().lower()
+        if default is not None and choice == "":
+            return valid[default]
+        elif choice in valid:
+            return valid[choice]
+        else:
+            sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
+
+
+def cli_detect(argv: Optional[List[str]] = None) -> int:
+    """
+    CLI assistant using ARGV and ArgumentParser
+    :param argv:
+    :return: 0 if everything is fine, anything else equal trouble
+    """
+    parser = argparse.ArgumentParser(
+        description="The Real First Universal Charset Detector. "
+        "Discover originating encoding used on text file. "
+        "Normalize text to unicode."
+    )
+
+    parser.add_argument(
+        "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        default=False,
+        dest="verbose",
+        help="Display complementary information about file if any. "
+        "Stdout will contain logs about the detection process.",
+    )
+    parser.add_argument(
+        "-a",
+        "--with-alternative",
+        action="store_true",
+        default=False,
+        dest="alternatives",
+        help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
+    )
+    parser.add_argument(
+        "-n",
+        "--normalize",
+        action="store_true",
+        default=False,
+        dest="normalize",
+        help="Permit to normalize input file. If not set, program does not write anything.",
+    )
+    parser.add_argument(
+        "-m",
+        "--minimal",
+        action="store_true",
+        default=False,
+        dest="minimal",
+        help="Only output the charset detected to STDOUT. Disabling JSON output.",
+    )
+    parser.add_argument(
+        "-r",
+        "--replace",
+        action="store_true",
+        default=False,
+        dest="replace",
+        help="Replace file when trying to normalize it instead of creating a new one.",
+    )
+    parser.add_argument(
+        "-f",
+        "--force",
+        action="store_true",
+        default=False,
+        dest="force",
+        help="Replace file without asking if you are sure, use this flag with caution.",
+    )
+    parser.add_argument(
+        "-t",
+        "--threshold",
+        action="store",
+        default=0.2,
+        type=float,
+        dest="threshold",
+        help="Define a custom maximum amount of chaos allowed in decoded content. 0. <= chaos <= 1.",
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
+            __version__,
+            python_version(),
+            unidata_version,
+            "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
+        ),
+        help="Show version information and exit.",
+    )
+
+    args = parser.parse_args(argv)
+
+    if args.replace is True and args.normalize is False:
+        print("Use --replace in addition of --normalize only.", file=sys.stderr)
+        return 1
+
+    if args.force is True and args.replace is False:
+        print("Use --force in addition of --replace only.", file=sys.stderr)
+        return 1
+
+    if args.threshold < 0.0 or args.threshold > 1.0:
+        print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
+        return 1
+
+    x_ = []
+
+    for my_file in args.files:
+        matches = from_fp(my_file, threshold=args.threshold, explain=args.verbose)
+
+        best_guess = matches.best()
+
+        if best_guess is None:
+            print(
+                'Unable to identify originating encoding for "{}". {}'.format(
+                    my_file.name,
+                    "Maybe try increasing maximum amount of chaos."
+                    if args.threshold < 1.0
+                    else "",
+                ),
+                file=sys.stderr,
+            )
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    None,
+                    [],
+                    [],
+                    "Unknown",
+                    [],
+                    False,
+                    1.0,
+                    0.0,
+                    None,
+                    True,
+                )
+            )
+        else:
+            x_.append(
+                CliDetectionResult(
+                    abspath(my_file.name),
+                    best_guess.encoding,
+                    best_guess.encoding_aliases,
+                    [
+                        cp
+                        for cp in best_guess.could_be_from_charset
+                        if cp != best_guess.encoding
+                    ],
+                    best_guess.language,
+                    best_guess.alphabets,
+                    best_guess.bom,
+                    best_guess.percent_chaos,
+                    best_guess.percent_coherence,
+                    None,
+                    True,
+                )
+            )
+
+            if len(matches) > 1 and args.alternatives:
+                for el in matches:
+                    if el != best_guess:
+                        x_.append(
+                            CliDetectionResult(
+                                abspath(my_file.name),
+                                el.encoding,
+                                el.encoding_aliases,
+                                [
+                                    cp
+                                    for cp in el.could_be_from_charset
+                                    if cp != el.encoding
+                                ],
+                                el.language,
+                                el.alphabets,
+                                el.bom,
+                                el.percent_chaos,
+                                el.percent_coherence,
+                                None,
+                                False,
+                            )
+                        )
+
+            if args.normalize is True:
+                if best_guess.encoding.startswith("utf") is True:
+                    print(
+                        '"{}" file does not need to be normalized, as it already came from unicode.'.format(
+                            my_file.name
+                        ),
+                        file=sys.stderr,
+                    )
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+
+                dir_path = dirname(realpath(my_file.name))
+                file_name = basename(realpath(my_file.name))
+
+                o_: List[str] = file_name.split(".")
+
+                if args.replace is False:
+                    o_.insert(-1, best_guess.encoding)
+                    if my_file.closed is False:
+                        my_file.close()
+                elif (
+                    args.force is False
+                    and query_yes_no(
+                        'Are you sure to normalize "{}" by replacing it ?'.format(
+                            my_file.name
+                        ),
+                        "no",
+                    )
+                    is False
+                ):
+                    if my_file.closed is False:
+                        my_file.close()
+                    continue
+
+                try:
+                    x_[0].unicode_path = join(dir_path, ".".join(o_))
+
+                    with open(x_[0].unicode_path, "w", encoding="utf-8") as fp:
+                        fp.write(str(best_guess))
+                except IOError as e:
+                    print(str(e), file=sys.stderr)
+                    if my_file.closed is False:
+                        my_file.close()
+                    return 2
+
+        if my_file.closed is False:
+            my_file.close()
+
+    if args.minimal is False:
+        print(
+            dumps(
+                [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
+                ensure_ascii=True,
+                indent=4,
+            )
+        )
+    else:
+        for my_file in args.files:
+            print(
+                ", ".join(
+                    [
+                        el.encoding or "undefined"
+                        for el in x_
+                        if el.path == abspath(my_file.name)
+                    ]
+                )
+            )
+
+    return 0
+
+
+if __name__ == "__main__":
+    cli_detect()