New models!

2024-10-06 23:26:08 -06:00
parent e6c855fcf9
commit 6394569b3b
2 changed files with 4625 additions and 4364 deletions
@@ -1,4 +1,5 @@
 descriptions = {
+   'llama3.2': _("Meta's Llama 3.2 goes small with 1B and 3B models."),
   'llama3.1': _("Llama 3.1 is a new state-of-the-art model from Meta available in 8B, 70B and 405B parameter sizes."),
   'gemma2': _("Google Gemma 2 is a high-performing and efficient model available in three sizes: 2B, 9B, and 27B."),
   'qwen2.5': _("Qwen2.5 models are pretrained on Alibaba's latest large-scale dataset, encompassing up to 18 trillion tokens. The model supports up to 128K tokens and has multilingual support."),
@@ -21,87 +22,88 @@ descriptions = {
   'llama2': _("Llama 2 is a collection of foundation language models ranging from 7B to 70B parameters."),
   'codellama': _("A large language model that can use text prompts to generate and discuss code."),
   'nomic-embed-text': _("A high-performing open embedding model with a large token context window."),
+   'mxbai-embed-large': _("State-of-the-art large embedding model from mixedbread.ai"),
   'dolphin-mixtral': _("Uncensored, 8x7b and 8x22b fine-tuned models based on the Mixtral mixture of experts models that excels at coding tasks. Created by Eric Hartford."),
   'phi': _("Phi-2: a 2.7B language model by Microsoft Research that demonstrates outstanding reasoning and language understanding capabilities."),
-   'llama2-uncensored': _("Uncensored Llama 2 model by George Sung and Jarrad Hope."),
   'deepseek-coder': _("DeepSeek Coder is a capable coding model trained on two trillion code and natural language tokens."),
-   'mxbai-embed-large': _("State-of-the-art large embedding model from mixedbread.ai"),
   'starcoder2': _("StarCoder2 is the next generation of transparently trained open code LLMs that comes in three sizes: 3B, 7B and 15B parameters."),
+   'llama2-uncensored': _("Uncensored Llama 2 model by George Sung and Jarrad Hope."),
   'dolphin-mistral': _("The uncensored Dolphin model based on Mistral that excels at coding tasks. Updated to version 2.8."),
   'zephyr': _("Zephyr is a series of fine-tuned versions of the Mistral and Mixtral models that are trained to act as helpful assistants."),
+   'yi': _("Yi 1.5 is a high-performing, bilingual language model."),
   'dolphin-llama3': _("Dolphin 2.9 is a new model with 8B and 70B sizes by Eric Hartford based on Llama 3 that has a variety of instruction, conversational, and coding skills."),
   'orca-mini': _("A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware."),
-   'yi': _("Yi 1.5 is a high-performing, bilingual language model."),
   'llava-llama3': _("A LLaVA model fine-tuned from Llama 3 Instruct with better scores in several benchmarks."),
+   'qwen2.5-coder': _("The latest series of Code-Specific Qwen models, with significant improvements in code generation, code reasoning, and code fixing."),
   'mistral-openorca': _("Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset."),
   'starcoder': _("StarCoder is a code generation model trained on 80+ programming languages."),
   'tinyllama': _("The TinyLlama project is an open endeavor to train a compact 1.1B Llama model on 3 trillion tokens."),
-   'vicuna': _("General use chat model based on Llama and Llama 2 with 2K to 16K context sizes."),
   'codestral': _("Codestral is Mistral AI’s first-ever code model designed for code generation tasks."),
+   'vicuna': _("General use chat model based on Llama and Llama 2 with 2K to 16K context sizes."),
   'llama2-chinese': _("Llama 2 based model fine tuned to improve Chinese dialogue ability."),
+   'snowflake-arctic-embed': _("A suite of text embedding models by Snowflake, optimized for performance."),
   'wizard-vicuna-uncensored': _("Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford."),
+   'granite-code': _("A family of open foundation models by IBM for Code Intelligence"),
   'codegeex4': _("A versatile model for AI software development scenarios, including code completion."),
   'nous-hermes2': _("The powerful family of models by Nous Research that excels at scientific discussion and coding tasks."),
-   'granite-code': _("A family of open foundation models by IBM for Code Intelligence"),
+   'all-minilm': _("Embedding models on very large sentence level datasets."),
   'openchat': _("A family of open-source models trained on a wide variety of data, surpassing ChatGPT on various benchmarks. Updated to version 3.5-0106."),
   'aya': _("Aya 23, released by Cohere, is a new family of state-of-the-art, multilingual models that support 23 languages."),
-   'wizardlm2': _("State of the art large language model from Microsoft AI with improved performance on complex chat, multilingual, reasoning and agent use cases."),
   'codeqwen': _("CodeQwen1.5 is a large language model pretrained on a large amount of code data."),
+   'wizardlm2': _("State of the art large language model from Microsoft AI with improved performance on complex chat, multilingual, reasoning and agent use cases."),
   'tinydolphin': _("An experimental 1.1B parameter model trained on the new Dolphin 2.8 dataset by Eric Hartford and based on TinyLlama."),
-   'all-minilm': _("Embedding models on very large sentence level datasets."),
   'wizardcoder': _("State-of-the-art code generation model"),
   'stable-code': _("Stable Code 3B is a coding model with instruct and code completion variants on par with models such as Code Llama 7B that are 2.5x larger."),
   'openhermes': _("OpenHermes 2.5 is a 7B model fine-tuned by Teknium on Mistral with fully open datasets."),
+   'qwen2-math': _("Qwen2 Math is a series of specialized math language models built upon the Qwen2 LLMs, which significantly outperforms the mathematical capabilities of open-source models and even closed-source models (e.g., GPT4o)."),
   'bakllava': _("BakLLaVA is a multimodal model consisting of the Mistral 7B base model augmented with the LLaVA architecture."),
   'stablelm2': _("Stable LM 2 is a state-of-the-art 1.6B and 12B parameter language model trained on multilingual data in English, Spanish, German, Italian, French, Portuguese, and Dutch."),
-   'qwen2-math': _("Qwen2 Math is a series of specialized math language models built upon the Qwen2 LLMs, which significantly outperforms the mathematical capabilities of open-source models and even closed-source models (e.g., GPT4o)."),
-   'wizard-math': _("Model focused on math and logic problems"),
   'llama3-gradient': _("This model extends LLama-3 8B's context length from 8k to over 1m tokens."),
-   'neural-chat': _("A fine-tuned model based on Mistral with good coverage of domain and language."),
   'deepseek-llm': _("An advanced language model crafted with 2 trillion bilingual tokens."),
+   'wizard-math': _("Model focused on math and logic problems"),
+   'glm4': _("A strong multi-lingual general language model with competitive performance to Llama 3."),
+   'neural-chat': _("A fine-tuned model based on Mistral with good coverage of domain and language."),
+   'reflection': _("A high-performing model trained with a new technique called Reflection-tuning that teaches a LLM to detect mistakes in its reasoning and correct course."),
+   'llama3-chatqa': _("A model from NVIDIA based on Llama 3 that excels at conversational question answering (QA) and retrieval-augmented generation (RAG)."),
+   'mistral-large': _("Mistral Large 2 is Mistral's new flagship model that is significantly more capable in code generation, mathematics, and reasoning with 128k context window and support for dozens of languages."),
+   'moondream': _("moondream2 is a small vision language model designed to run efficiently on edge devices."),
+   'xwinlm': _("Conversational model based on Llama 2 that performs competitively on various benchmarks."),
   'phind-codellama': _("Code generation model based on Code Llama."),
   'nous-hermes': _("General use models based on Llama and Llama 2 from Nous Research."),
-   'xwinlm': _("Conversational model based on Llama 2 that performs competitively on various benchmarks."),
   'sqlcoder': _("SQLCoder is a code completion model fined-tuned on StarCoder for SQL generation tasks"),
   'dolphincoder': _("A 7B and 15B uncensored variant of the Dolphin model family that excels at coding, based on StarCoder2."),
-   'llama3-chatqa': _("A model from NVIDIA based on Llama 3 that excels at conversational question answering (QA) and retrieval-augmented generation (RAG)."),
   'yarn-llama2': _("An extension of Llama 2 that supports a context of up to 128k tokens."),
-   'mistral-large': _("Mistral Large 2 is Mistral's new flagship model that is significantly more capable in code generation, mathematics, and reasoning with 128k context window and support for dozens of languages."),
-   'wizardlm': _("General use model based on Llama 2."),
   'smollm': _("🪐 A family of small models with 135M, 360M, and 1.7B parameters, trained on a new high-quality dataset."),
+   'wizardlm': _("General use model based on Llama 2."),
+   'deepseek-v2': _("A strong, economical, and efficient Mixture-of-Experts language model."),
   'starling-lm': _("Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness."),
-   'reflection': _("A high-performing model trained with a new technique called Reflection-tuning that teaches a LLM to detect mistakes in its reasoning and correct course."),
-   'moondream': _("moondream2 is a small vision language model designed to run efficiently on edge devices."),
-   'snowflake-arctic-embed': _("A suite of text embedding models by Snowflake, optimized for performance."),
   'samantha-mistral': _("A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral."),
   'solar': _("A compact, yet powerful 10.7B large language model designed for single-turn conversation."),
   'orca2': _("Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta's Llama 2 models. The model is designed to excel particularly in reasoning."),
-   'deepseek-v2': _("A strong, economical, and efficient Mixture-of-Experts language model."),
   'stable-beluga': _("Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy."),
-   'glm4': _("A strong multi-lingual general language model with competitive performance to Llama 3."),
   'dolphin-phi': _("2.7B uncensored Dolphin model by Eric Hartford, based on the Phi language model by Microsoft Research."),
   'wizardlm-uncensored': _("Uncensored version of Wizard LM model"),
-   'llava-phi3': _("A new small LLaVA model fine-tuned from Phi 3 Mini."),
   'hermes3': _("Hermes 3 is the latest version of the flagship Hermes series of LLMs by Nous Research"),
+   'yi-coder': _("Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters."),
+   'llava-phi3': _("A new small LLaVA model fine-tuned from Phi 3 Mini."),
+   'internlm2': _("InternLM2.5 is a 7B parameter model tailored for practical scenarios with outstanding reasoning capability."),
   'yarn-mistral': _("An extension of Mistral to support context windows of 64K or 128K."),
   'llama-pro': _("An expansion of Llama 2 that specializes in integrating both general language understanding and domain-specific knowledge, particularly in programming and mathematics."),
   'medllama2': _("Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset."),
-   'yi-coder': _("Yi-Coder is a series of open-source code language models that delivers state-of-the-art coding performance with fewer than 10 billion parameters."),
-   'internlm2': _("InternLM2.5 is a 7B parameter model tailored for practical scenarios with outstanding reasoning capability."),
   'meditron': _("Open-source medical large language model adapted from Llama 2 to the medical domain."),
   'nexusraven': _("Nexus Raven is a 13B instruction tuned model for function calling tasks."),
   'nous-hermes2-mixtral': _("The Nous Hermes 2 model from Nous Research, now trained over Mixtral."),
   'codeup': _("Great code generation model based on Llama2."),
-   'everythinglm': _("Uncensored Llama2 based model with support for a 16K context window."),
   'llama3-groq-tool-use': _("A series of models from Groq that represent a significant advancement in open-source AI capabilities for tool use/function calling."),
+   'everythinglm': _("Uncensored Llama2 based model with support for a 16K context window."),
   'magicoder': _("🎩 Magicoder is a family of 7B parameter models trained on 75K synthetic instruction data using OSS-Instruct, a novel approach to enlightening LLMs with open-source code snippets."),
   'stablelm-zephyr': _("A lightweight chat model allowing accurate, and responsive output without requiring high-end hardware."),
   'codebooga': _("A high-performing code instruct model created by merging two existing code models."),
+   'wizard-vicuna': _("Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj."),
   'mistrallite': _("MistralLite is a fine-tuned model based on Mistral with enhanced capabilities of processing long contexts."),
   'falcon2': _("Falcon2 is an 11B parameters causal decoder-only model built by TII and trained over 5T tokens."),
-   'wizard-vicuna': _("Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj."),
   'duckdb-nsql': _("7B parameter text-to-SQL model made by MotherDuck and Numbers Station."),
-   'qwen2.5-coder': _("The latest series of Code-Specific Qwen models, with significant improvements in code generation, code reasoning, and code fixing."),
+   'minicpm-v': _("A series of multimodal LLMs (MLLMs) designed for vision-language understanding."),
   'megadolphin': _("MegaDolphin-2.2-120b is a transformation of Dolphin-2.2-70b created by interleaving the model with itself."),
   'notux': _("A top-performing mixture of experts model, fine-tuned with high-quality data."),
   'goliath': _("A language model created by combining two fine-tuned Llama 2 70B models into one."),
@@ -110,14 +112,13 @@ descriptions = {
   'bge-m3': _("BGE-M3 is a new model from BAAI distinguished for its versatility in Multi-Functionality, Multi-Linguality, and Multi-Granularity."),
   'mathstral': _("MathΣtral: a 7B model designed for math reasoning and scientific discovery by Mistral AI."),
   'dbrx': _("DBRX is an open, general-purpose LLM created by Databricks."),
-   'minicpm-v': _("A series of multimodal LLMs (MLLMs) designed for vision-language understanding."),
+   'solar-pro': _("Solar Pro Preview: an advanced large language model (LLM) with 22 billion parameters designed to fit into a single GPU"),
   'nuextract': _("A 3.8B model fine-tuned on a private high-quality synthetic dataset for information extraction, based on Phi-3."),
   'alfred': _("A robust conversational model designed to be used for both chat and instruct use cases."),
   'firefunction-v2': _("An open weights function calling model based on Llama 3, competitive with GPT-4o function calling capabilities."),
-   'solar-pro': _("Solar Pro Preview: an advanced large language model (LLM) with 22 billion parameters designed to fit into a single GPU"),
-   'bge-large': _("Embedding model from BAAI mapping texts to vectors."),
   'reader-lm': _("A series of models that convert HTML content to Markdown content, which is useful for content conversion tasks."),
+   'bge-large': _("Embedding model from BAAI mapping texts to vectors."),
   'deepseek-v2.5': _("An upgraded version of DeekSeek-V2 that integrates the general and coding abilities of both DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct."),
-   'paraphrase-multilingual': _("Sentence-transformers model that can be used for tasks like clustering or semantic search."),
   'bespoke-minicheck': _("A state-of-the-art fact-checking model developed by Bespoke Labs."),
+   'paraphrase-multilingual': _("Sentence-transformers model that can be used for tasks like clustering or semantic search."),
 }