Alpaca/src/available_models.py

# available_models.py
# There isn't an API to do this, sorry
available_models = {
    "llama3":"Meta Llama 3: The most capable openly available LLM to date",
    "phi3":"Phi-3 Mini is a 3.8B parameters, lightweight, state-of-the-art open model by Microsoft.",
    "wizardlm2":"State of the art large language model from Microsoft AI with improved performance on complex chat, multilingual, reasoning and agent use cases.",
    "mistral":"The 7B model released by Mistral AI, updated to version 0.2.",
    "gemma":"Gemma is a family of lightweight, state-of-the-art open models built by Google DeepMind. Updated to version 1.1",
    "mixtral":"A set of Mixture of Experts (MoE) model with open weights by Mistral AI in 8x7b and 8x22b parameter sizes.",
    "llama2":"Llama 2 is a collection of foundation language models ranging from 7B to 70B parameters.",
    "codegemma":"CodeGemma is a collection of powerful, lightweight models that can perform a variety of coding tasks like fill-in-the-middle code completion, code generation, natural language understanding, mathematical reasoning, and instruction following.",
    "command-r":"Command R is a Large Language Model optimized for conversational interaction and long context tasks.",
    "command-r-plus":"Command R+ is a powerful, scalable large language model purpose-built to excel at real-world enterprise use cases.",
    "llava":"🌋 LLaVA is a novel end-to-end trained large multimodal model that combines a vision encoder and Vicuna for general-purpose visual and language understanding. Updated to version 1.6.",
    "dbrx":"DBRX is an open, general-purpose LLM created by Databricks.",
    "codellama":"A large language model that can use text prompts to generate and discuss code.",
    "qwen":"Qwen 1.5 is a series of large language models by Alibaba Cloud spanning from 0.5B to 110B parameters",
    "dolphin-mixtral":"Uncensored, 8x7b and 8x22b fine-tuned models based on the Mixtral mixture of experts models that excels at coding tasks. Created by Eric Hartford.",
    "llama2-uncensored":"Uncensored Llama 2 model by George Sung and Jarrad Hope.",
    "deepseek-coder":"DeepSeek Coder is a capable coding model trained on two trillion code and natural language tokens.",
    "mistral-openorca":"Mistral OpenOrca is a 7 billion parameter model, fine-tuned on top of the Mistral 7B model using the OpenOrca dataset.",
    "nomic-embed-text":"A high-performing open embedding model with a large token context window.",
    "phi":"Phi-2: a 2.7B language model by Microsoft Research that demonstrates outstanding reasoning and language understanding capabilities.",
    "dolphin-mistral":"The uncensored Dolphin model based on Mistral that excels at coding tasks. Updated to version 2.8.",
    "orca-mini":"A general-purpose model ranging from 3 billion parameters to 70 billion, suitable for entry-level hardware.",
    "nous-hermes2":"The powerful family of models by Nous Research that excels at scientific discussion and coding tasks.",
    "zephyr":"Zephyr is a series of fine-tuned versions of the Mistral and Mixtral models that are trained to act as helpful assistants.",
    "llama2-chinese":"Llama 2 based model fine tuned to improve Chinese dialogue ability.",
    "wizard-vicuna-uncensored":"Wizard Vicuna Uncensored is a 7B, 13B, and 30B parameter model based on Llama 2 uncensored by Eric Hartford.",
    "vicuna":"General use chat model based on Llama and Llama 2 with 2K to 16K context sizes.",
    "starcoder2":"StarCoder2 is the next generation of transparently trained open code LLMs that comes in three sizes: 3B, 7B and 15B parameters.",
    "openhermes":"OpenHermes 2.5 is a 7B model fine-tuned by Teknium on Mistral with fully open datasets.",
    "tinyllama":"The TinyLlama project is an open endeavor to train a compact 1.1B Llama model on 3 trillion tokens.",
    "openchat":"A family of open-source models trained on a wide variety of data, surpassing ChatGPT on various benchmarks. Updated to version 3.5-0106.",
    "tinydolphin":"An experimental 1.1B parameter model trained on the new Dolphin 2.8 dataset by Eric Hartford and based on TinyLlama.",
    "starcoder":"StarCoder is a code generation model trained on 80+ programming languages.",
    "wizardcoder":"State-of-the-art code generation model",
    "stable-code":"Stable Code 3B is a coding model with instruct and code completion variants on par with models such as Code Llama 7B that are 2.5x larger.",
    "dolphin-llama3":"Dolphin 2.9 is a new model with 8B and 70B sizes by Eric Hartford based on Llama 3 that has a variety of instruction, conversational, and coding skills.",
    "yi":"A high-performing, bilingual language model.",
    "mxbai-embed-large":"State-of-the-art large embedding model from mixedbread.ai",
    "neural-chat":"A fine-tuned model based on Mistral with good coverage of domain and language.",
    "phind-codellama":"Code generation model based on Code Llama.",
    "wizard-math":"Model focused on math and logic problems",
    "starling-lm":"Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.",
    "falcon":"A large language model built by the Technology Innovation Institute (TII) for use in summarization, text generation, and chat bots.",
    "orca2":"Orca 2 is built by Microsoft research, and are a fine-tuned version of Meta's Llama 2 models. The model is designed to excel particularly in reasoning.",
    "dolphincoder":"A 7B and 15B uncensored variant of the Dolphin model family that excels at coding, based on StarCoder2.",
    "dolphin-phi":"2.7B uncensored Dolphin model by Eric Hartford, based on the Phi language model by Microsoft Research.",
    "nous-hermes":"General use models based on Llama and Llama 2 from Nous Research.",
    "sqlcoder":"SQLCoder is a code completion model fined-tuned on StarCoder for SQL generation tasks",
    "solar":"A compact, yet powerful 10.7B large language model designed for single-turn conversation.",
    "stablelm2":"Stable LM 2 is a state-of-the-art 1.6B and 12B parameter language model trained on multilingual data in English, Spanish, German, Italian, French, Portuguese, and Dutch.",
    "bakllava":"BakLLaVA is a multimodal model consisting of the Mistral 7B base model augmented with the LLaVA architecture.",
    "medllama2":"Fine-tuned Llama 2 model to answer medical questions based on an open source medical dataset.",
    "yarn-llama2":"An extension of Llama 2 that supports a context of up to 128k tokens.",
    "deepseek-llm":"An advanced language model crafted with 2 trillion bilingual tokens.",
    "nous-hermes2-mixtral":"The Nous Hermes 2 model from Nous Research, now trained over Mixtral.",
    "wizardlm-uncensored":"Uncensored version of Wizard LM model",
    "codeqwen":"CodeQwen1.5 is a large language model pretrained on a large amount of code data.",
    "all-minilm":"Embedding models on very large sentence level datasets.",
    "samantha-mistral":"A companion assistant trained in philosophy, psychology, and personal relationships. Based on Mistral.",
    "codeup":"Great code generation model based on Llama2.",
    "stable-beluga":"Llama 2 based model fine tuned on an Orca-style dataset. Originally called Free Willy.",
    "llama3-gradient":"This model extends LLama-3 8B's context length from 8k to over 1m tokens.",
    "everythinglm":"Uncensored Llama2 based model with support for a 16K context window.",
    "xwinlm":"Conversational model based on Llama 2 that performs competitively on various benchmarks.",
    "yarn-mistral":"An extension of Mistral to support context windows of 64K or 128K.",
    "meditron":"Open-source medical large language model adapted from Llama 2 to the medical domain.",
    "wizardlm":"General use model based on Llama 2.",
    "llama-pro":"An expansion of Llama 2 that specializes in integrating both general language understanding and domain-specific knowledge, particularly in programming and mathematics.",
    "magicoder":"🎩 Magicoder is a family of 7B parameter models trained on 75K synthetic instruction data using OSS-Instruct, a novel approach to enlightening LLMs with open-source code snippets.",
    "stablelm-zephyr":"A lightweight chat model allowing accurate, and responsive output without requiring high-end hardware.",
    "codebooga":"A high-performing code instruct model created by merging two existing code models.",
    "nexusraven":"Nexus Raven is a 13B instruction tuned model for function calling tasks.",
    "mistrallite":"MistralLite is a fine-tuned model based on Mistral with enhanced capabilities of processing long contexts.",
    "wizard-vicuna":"Wizard Vicuna is a 13B parameter model based on Llama 2 trained by MelodysDreamj.",
    "goliath":"A language model created by combining two fine-tuned Llama 2 70B models into one.",
    "open-orca-platypus2":"Merge of the Open Orca OpenChat model and the Garage-bAInd Platypus 2 model. Designed for chat and code generation.",
    "notux":"A top-performing mixture of experts model, fine-tuned with high-quality data.",
    "megadolphin":"MegaDolphin-2.2-120b is a transformation of Dolphin-2.2-70b created by interleaving the model with itself.",
    "snowflake-arctic-embed":"A suite of text embedding models by Snowflake, optimized for performance.",
    "duckdb-nsql":"7B parameter text-to-SQL model made by MotherDuck and Numbers Station.",
    "moondream":"moondream is a small vision language model designed to run efficiently on edge devices.",
    "notus":"A 7B chat model fine-tuned with high-quality data and based on Zephyr.",
    "alfred":"A robust conversational model designed to be used for both chat and instruct use cases.",
    "llava-llama3":"A LLaVA model fine-tuned from Llama 3 Instruct with better scores in several benchmarks.",
    "llama3-chatqa":"A model from NVIDIA based on Llama 3 that excels at conversational question answering (QA) and retrieval-augmented generation (RAG).",
    "llava-phi3":"A new small LLaVA model fine-tuned from Phi 3 Mini."
}