'llama3.1':_("Llama 3.1 is a new state-of-the-art model from Meta available in 8B, 70B and 405B parameter sizes."),
'gemma2':_("Google Gemma 2 is now available in 2 sizes, 9B and 27B."),
'llama3':_("Meta Llama 3: The most capable openly available LLM to date"),
'mistral-nemo':_("A state-of-the-art 12B model with 128k context length, built by Mistral AI in collaboration with NVIDIA."),
'mistral-large':_("Mistral Large 2 is Mistral's new flagship model that is significantly more capable in code generation, mathematics, and reasoning with 128k context window and support for dozens of languages."),
'qwen2':_("Qwen2 is a new series of large language models from Alibaba group"),
'deepseek-coder-v2':_("An open-source Mixture-of-Experts code language model that achieves performance comparable to GPT4-Turbo in code-specific tasks."),
'phi3':_("Phi-3 is a family of lightweight 3B (Mini) and 14B (Medium) state-of-the-art open models by Microsoft."),
'aya':_("Aya 23, released by Cohere, is a new family of state-of-the-art, multilingual models that support 23 languages."),
'mistral':_("The 7B model released by Mistral AI, updated to version 0.3."),
'mixtral':_("A set of Mixture of Experts (MoE) model with open weights by Mistral AI in 8x7b and 8x22b parameter sizes."),
'codegemma':_("CodeGemma is a collection of powerful, lightweight models that can perform a variety of coding tasks like fill-in-the-middle code completion, code generation, natural language understanding, mathematical reasoning, and instruction following."),
'command-r':_("Command R is a Large Language Model optimized for conversational interaction and long context tasks."),
'command-r-plus':_("Command R+ is a powerful, scalable large language model purpose-built to excel at real-world enterprise use cases."),
'llava':_("🌋 LLaVA is a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding. Updated to version 1.6."),
'llama3':_("Meta Llama 3: The most capable openly available LLM to date"),
'gemma':_("Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1"),
'qwen':_("Qwen 1.5 is a series of large language models by Alibaba Cloud spanning from 0.5B to 110B parameters"),
'llama2':_("Llama 2 is a collection of foundation language models ranging from 7B to 70B parameters."),
@@ -18,49 +20,50 @@ descriptions = {
'dolphin-mixtral':_("Uncensored, 8x7b and 8x22b fine-tuned models based on the Mixtral mixture of experts models that excels at coding tasks. Created by Eric Hartford."),
'nomic-embed-text':_("A high-performing open embedding model with a large token context window."),
'llama2-uncensored':_("Uncensored Llama 2 model by George Sung and Jarrad Hope."),
'deepseek-coder':_("DeepSeek Coder is a capable coding model trained on two trillion code and natural language tokens."),
'phi':_("Phi-2: a 2.7B language model by Microsoft Research that demonstrates outstanding reasoning and language understanding capabilities."),
'deepseek-coder':_("DeepSeek Coder is a capable coding model trained on two trillion code and natural language tokens."),
'dolphin-mistral':_("The uncensored Dolphin model based on Mistral that excels at coding tasks. Updated to version 2.8."),
'orca-mini':_("A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware."),
'dolphin-llama3':_("Dolphin 2.9 is a new model with 8B and 70B sizes by Eric Hartford based on Llama 3 that has a variety of instruction, conversational, and coding skills."),
'mxbai-embed-large':_("State-of-the-art large embedding model from mixedbread.ai"),
'mistral-openorca':_("Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset."),
'starcoder2':_("StarCoder2 is the next generation of transparently trained open code LLMs that comes in three sizes: 3B, 7B and 15B parameters."),
'zephyr':_("Zephyr is a series of fine-tuned versions of the Mistral and Mixtral models that are trained to act as helpful assistants."),
'mistral-openorca':_("Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset."),
'yi':_("Yi 1.5 is a high-performing, bilingual language model."),
'zephyr':_("Zephyr is a series of fine-tuned versions of the Mistral and Mixtral models that are trained to act as helpful assistants."),
'llama2-chinese':_("Llama 2 based model fine tuned to improve Chinese dialogue ability."),
'llava-llama3':_("A LLaVA model fine-tuned from Llama 3 Instruct with better scores in several benchmarks."),
'vicuna':_("General use chat model based on Llama and Llama 2 with 2K to 16K context sizes."),
'nous-hermes2':_("The powerful family of models by Nous Research that excels at scientific discussion and coding tasks."),
'wizard-vicuna-uncensored':_("Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford."),
'tinyllama':_("The TinyLlama project is an open endeavor to train a compact 1.1B Llama model on 3 trillion tokens."),
'wizard-vicuna-uncensored':_("Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford."),
'codestral':_("Codestral is Mistral AI’s first-ever code model designed for code generation tasks."),
'starcoder':_("StarCoder is a code generation model trained on 80+ programming languages."),
'wizardlm2':_("State of the art large language model from Microsoft AI with improved performance on complex chat, multilingual, reasoning and agent use cases."),
'openchat':_("A family of open-source models trained on a wide variety of data, surpassing ChatGPT on various benchmarks. Updated to version 3.5-0106."),
'aya':_("Aya 23, released by Cohere, is a new family of state-of-the-art, multilingual models that support 23 languages."),
'tinydolphin':_("An experimental 1.1B parameter model trained on the new Dolphin 2.8 dataset by Eric Hartford and based on TinyLlama."),
'openhermes':_("OpenHermes 2.5 is a 7B model fine-tuned by Teknium on Mistral with fully open datasets."),
'stable-code':_("Stable Code 3B is a coding model with instruct and code completion variants on par with models such as Code Llama 7B that are 2.5x larger."),
'codeqwen':_("CodeQwen1.5 is a large language model pretrained on a large amount of code data."),
'neural-chat':_("A fine-tuned model based on Mistral with good coverage of domain and language."),
'wizard-math':_("Model focused on math and logic problems"),
'neural-chat':_("A fine-tuned model based on Mistral with good coverage of domain and language."),
'stablelm2':_("Stable LM 2 is a state-of-the-art 1.6B and 12B parameter language model trained on multilingual data in English, Spanish, German, Italian, French, Portuguese, and Dutch."),
'all-minilm':_("Embedding models on very large sentence level datasets."),
'granite-code':_("A family of open foundation models by IBM for Code Intelligence"),
'all-minilm':_("Embedding models on very large sentence level datasets."),
'phind-codellama':_("Code generation model based on Code Llama."),
'dolphincoder':_("A 7B and 15B uncensored variant of the Dolphin model family that excels at coding, based on StarCoder2."),
'nous-hermes':_("General use models based on Llama and Llama 2 from Nous Research."),
'sqlcoder':_("SQLCoder is a code completion model fined-tuned on StarCoder for SQL generation tasks"),
'llama3-gradient':_("This model extends LLama-3 8B's context length from 8k to over 1m tokens."),
'starling-lm':_("Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness."),
'deepseek-llm':_("An advanced language model crafted with 2 trillion bilingual tokens."),
'yarn-llama2':_("An extension of Llama 2 that supports a context of up to 128k tokens."),
'xwinlm':_("Conversational model based on Llama 2 that performs competitively on various benchmarks."),
'deepseek-llm':_("An advanced language model crafted with 2 trillion bilingual tokens."),
'llama3-chatqa':_("A model from NVIDIA based on Llama 3 that excels at conversational question answering (QA) and retrieval-augmented generation (RAG)."),
'orca2':_("Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta's Llama 2 models. The model is designed to excel particularly in reasoning."),
'solar':_("A compact, yet powerful 10.7B large language model designed for single-turn conversation."),
'wizardlm':_("General use model based on Llama 2."),
'solar':_("A compact, yet powerful 10.7B large language model designed for single-turn conversation."),
'samantha-mistral':_("A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral."),
'dolphin-phi':_("2.7B uncensored Dolphin model by Eric Hartford, based on the Phi language model by Microsoft Research."),
'stable-beluga':_("Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy."),
@@ -68,22 +71,23 @@ descriptions = {
'bakllava':_("BakLLaVA is a multimodal model consisting of the Mistral 7B base model augmented with the LLaVA architecture."),
'wizardlm-uncensored':_("Uncensored version of Wizard LM model"),
'snowflake-arctic-embed':_("A suite of text embedding models by Snowflake, optimized for performance."),
'deepseek-v2':_("A strong, economical, and efficient Mixture-of-Experts language model."),
'medllama2':_("Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset."),
'yarn-mistral':_("An extension of Mistral to support context windows of 64K or 128K."),
'nous-hermes2-mixtral':_("The Nous Hermes 2 model from Nous Research, now trained over Mixtral."),
'llama-pro':_("An expansion of Llama 2 that specializes in integrating both general language understanding and domain-specific knowledge, particularly in programming and mathematics."),
'deepseek-v2':_("A strong, economical, and efficient Mixture-of-Experts language model."),
'nous-hermes2-mixtral':_("The Nous Hermes 2 model from Nous Research, now trained over Mixtral."),
'meditron':_("Open-source medical large language model adapted from Llama 2 to the medical domain."),
'codeup':_("Great code generation model based on Llama2."),
'nexusraven':_("Nexus Raven is a 13B instruction tuned model for function calling tasks."),
'everythinglm':_("Uncensored Llama2 based model with support for a 16K context window."),
'llava-phi3':_("A new small LLaVA model fine-tuned from Phi 3 Mini."),
'codegeex4':_("A versatile model for AI software development scenarios, including code completion."),
'glm4':_("A strong multi-lingual general language model with competitive performance to Llama 3."),
'magicoder':_("🎩 Magicoder is a family of 7B parameter models trained on 75K synthetic instruction data using OSS-Instruct, a novel approach to enlightening LLMs with open-source code snippets."),
'stablelm-zephyr':_("A lightweight chat model allowing accurate, and responsive output without requiring high-end hardware."),
'codebooga':_("A high-performing code instruct model created by merging two existing code models."),
'mistrallite':_("MistralLite is a fine-tuned model based on Mistral with enhanced capabilities of processing long contexts."),
'glm4':_("A strong multi-lingual general language model with competitive performance to Llama 3."),
'wizard-vicuna':_("A strong multi-lingual general language model with competitive performance to Llama 3."),
'wizard-vicuna':_("Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj."),
'duckdb-nsql':_("7B parameter text-to-SQL model made by MotherDuck and Numbers Station."),
'megadolphin':_("MegaDolphin-2.2-120b is a transformation of Dolphin-2.2-70b created by interleaving the model with itself."),
'goliath':_("A language model created by combining two fine-tuned Llama 2 70B models into one."),
@@ -92,12 +96,10 @@ descriptions = {
'falcon2':_("Falcon2 is an 11B parameters causal decoder-only model built by TII and trained over 5T tokens."),
'notus':_("A 7B chat model fine-tuned with high-quality data and based on Zephyr."),
'dbrx':_("DBRX is an open, general-purpose LLM created by Databricks."),
'codegeex4':_("A versatile model for AI software development scenarios, including code completion."),
'alfred':_("A robust conversational model designed to be used for both chat and instruct use cases."),
'internlm2':_("InternLM2.5 is a 7B parameter model tailored for practical scenarios with outstanding reasoning capability."),
'alfred':_("A robust conversational model designed to be used for both chat and instruct use cases."),
'llama3-groq-tool-use':_("A series of models from Groq that represent a significant advancement in open-source AI capabilities for tool use/function calling."),
'mathstral':_("MathΣtral: a 7B model designed for math reasoning and scientific discovery by Mistral AI."),
'mistral-nemo':_("A state-of-the-art 12B model with 128k context length, built by Mistral AI in collaboration with NVIDIA."),
'firefunction-v2':_("An open weights function calling model based on Llama 3, competitive with GPT-4o function calling capabilities."),
'nuextract':_("A 3.8B model fine-tuned on a private high-quality synthetic dataset for information extraction, based on Phi-3."),
Blocking a user prevents them from interacting with repositories, such as opening or commenting on pull requests or issues. Learn more about blocking a user.