{ "openai/gpt-4": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006 }, "openai/gpt-4o": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015 }, "openai/gpt-4o-2024-05-13": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015 }, "openai/gpt-4-turbo-preview": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "openai/gpt-4-0314": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006 }, "openai/gpt-4-0613": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006 }, "openai/gpt-4-32k": { "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012 }, "openai/gpt-4-32k-0314": { "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012 }, "openai/gpt-4-32k-0613": { "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012 }, "openai/gpt-4-turbo": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "openai/gpt-4-turbo-2024-04-09": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "openai/gpt-4-1106-preview": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "openai/gpt-4-0125-preview": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "openai/gpt-3.5-turbo": { "max_tokens": 4097, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "openai/gpt-3.5-turbo-0301": { "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "openai/gpt-3.5-turbo-0613": { "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "openai/gpt-3.5-turbo-1106": { "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.0000010, "output_cost_per_token": 0.0000020 }, "openai/gpt-3.5-turbo-0125": { "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000015 }, "openai/gpt-3.5-turbo-16k": { "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004 }, "openai/gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004 }, "openai/text-embedding-3-large": { "max_tokens": 8191, "max_input_tokens": 8191, "output_vector_size": 3072, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.000000 }, "openai/text-embedding-3-small": { "max_tokens": 8191, "max_input_tokens": 8191, "output_vector_size": 1536, "input_cost_per_token": 0.00000002, "output_cost_per_token": 0.000000 }, "openai/text-embedding-ada-002": { "max_tokens": 8191, "max_input_tokens": 8191, "output_vector_size": 1536, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000 }, "openai/text-embedding-ada-002-v2": { "max_tokens": 8191, "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000 }, "openai/babbage-002": { "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 4096, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004 }, "openai/davinci-002": { "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002 }, "openai/gpt-3.5-turbo-instruct": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "openai/gpt-3.5-turbo-instruct-0914": { "max_tokens": 4097, "max_input_tokens": 8192, "max_output_tokens": 4097, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "azure/gpt-4o": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015 }, "azure/gpt-4-turbo-2024-04-09": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "azure/gpt-4-0125-preview": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "azure/gpt-4-1106-preview": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "azure/gpt-4-0613": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006 }, "azure/gpt-4-32k-0613": { "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012 }, "azure/gpt-4-32k": { "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012 }, "azure/gpt-4": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006 }, "azure/gpt-4-turbo": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "azure/gpt-4-turbo-vision-preview": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003 }, "azure/gpt-3.5-turbo-16k-0613": { "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004 }, "azure/gpt-3.5-turbo-1106": { "max_tokens": 4096, "max_input_tokens": 16384, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "azure/gpt-3.5-turbo-0125": { "max_tokens": 4096, "max_input_tokens": 16384, "max_output_tokens": 4096, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000015 }, "azure/gpt-3.5-turbo-16k": { "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004 }, "azure/gpt-3.5-turbo": { "max_tokens": 4096, "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.0000005, "output_cost_per_token": 0.0000015 }, "azure/gpt-3.5-turbo-instruct-0914": { "max_tokens": 4097, "max_input_tokens": 4097, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "azure/gpt-3.5-turbo-instruct": { "max_tokens": 4097, "max_input_tokens": 4097, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002 }, "azure/text-embedding-ada-002": { "max_tokens": 8191, "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000 }, "azure/text-embedding-3-large": { "max_tokens": 8191, "max_input_tokens": 8191, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.000000 }, "azure/text-embedding-3-small": { "max_tokens": 8191, "max_input_tokens": 8191, "input_cost_per_token": 0.00000002, "output_cost_per_token": 0.000000 }, "mistralai/mistral-tiny": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.00000025 }, "mistralai/mistral-small": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003 }, "mistralai/mistral-small-latest": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003 }, "mistralai/mistral-medium": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.0000027, "output_cost_per_token": 0.0000081 }, "mistralai/mistral-medium-latest": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.0000027, "output_cost_per_token": 0.0000081 }, "mistralai/mistral-medium-2312": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.0000027, "output_cost_per_token": 0.0000081 }, "mistralai/mistral-large-latest": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000012 }, "mistralai/mistral-large-2402": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000012 }, "mistralai/open-mistral-7b": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.00000025 }, "mistralai/open-mixtral-8x7b": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.0000007, "output_cost_per_token": 0.0000007 }, "mistralai/open-mixtral-8x22b": { "max_tokens": 8191, "max_input_tokens": 64000, "max_output_tokens": 8191, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000006 }, "mistralai/codestral-latest": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003 }, "mistralai/codestral-2405": { "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, "input_cost_per_token": 0.000001, "output_cost_per_token": 0.000003 }, "mistralai/mistral-embed": { "max_tokens": 8192, "max_input_tokens": 8192, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.0 }, "groq/llama2-70b-4096": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.00000070, "output_cost_per_token": 0.00000080 }, "groq/llama3-8b-8192": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000010 }, "groq/llama3-70b-8192": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, "input_cost_per_token": 0.00000064, "output_cost_per_token": 0.00000080 }, "groq/mixtral-8x7b-32768": { "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, "input_cost_per_token": 0.00000027, "output_cost_per_token": 0.00000027 }, "groq/gemma-7b-it": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, "input_cost_per_token": 0.00000010, "output_cost_per_token": 0.00000010 }, "anthropic/claude-instant-1": { "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551 }, "anthropic/claude-instant-1.2": { "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000000163, "output_cost_per_token": 0.000000551 }, "anthropic/claude-2": { "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024 }, "anthropic/claude-2.1": { "max_tokens": 8191, "max_input_tokens": 200000, "max_output_tokens": 8191, "input_cost_per_token": 0.000008, "output_cost_per_token": 0.000024 }, "anthropic/claude-3-haiku-20240307": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.00000125 }, "anthropic/claude-3-opus-20240229": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000075 }, "anthropic/claude-3-sonnet-20240229": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015 }, "vertexai/chat-bison": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/chat-bison@001": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/chat-bison@002": { "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/chat-bison-32k": { "max_tokens": 8192, "max_input_tokens": 32000, "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/code-bison": { "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/code-bison@001": { "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/code-gecko@001": { "max_tokens": 64, "max_input_tokens": 2048, "max_output_tokens": 64, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/code-gecko@002": { "max_tokens": 64, "max_input_tokens": 2048, "max_output_tokens": 64, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/code-gecko": { "max_tokens": 64, "max_input_tokens": 2048, "max_output_tokens": 64, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/codechat-bison": { "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/codechat-bison@001": { "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/codechat-bison-32k": { "max_tokens": 8192, "max_input_tokens": 32000, "max_output_tokens": 8192, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125 }, "vertexai/gemini-pro": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.0-pro": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.0-pro-001": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.0-pro-002": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.5-pro": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-1.5-flash-001": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0, "output_cost_per_token": 0 }, "vertexai/gemini-1.5-flash-preview-0514": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0, "output_cost_per_token": 0 }, "vertexai/gemini-1.5-pro-001": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-1.5-pro-preview-0514": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-1.5-pro-preview-0215": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-1.5-pro-preview-0409": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0.000000625, "output_cost_per_token": 0.000001875 }, "vertexai/gemini-experimental": { "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_token": 0, "output_cost_per_token": 0 }, "vertexai/gemini-pro-vision": { "max_tokens": 2048, "max_input_tokens": 16384, "max_output_tokens": 2048, "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.0-pro-vision": { "max_tokens": 2048, "max_input_tokens": 16384, "max_output_tokens": 2048, "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/gemini-1.0-pro-vision-001": { "max_tokens": 2048, "max_input_tokens": 16384, "max_output_tokens": 2048, "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.0000005 }, "vertexai/claude-3-sonnet@20240229": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015 }, "vertexai/claude-3-haiku@20240307": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, "input_cost_per_token": 0.00000025, "output_cost_per_token": 0.00000125 }, "vertexai/claude-3-opus@20240229": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000075 }, "cohere/command-r": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00000050, "output_cost_per_token": 0.0000015 }, "cohere/command-light": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command-r-plus": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000015 }, "cohere/command-nightly": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command-medium-beta": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "cohere/command-xlarge-beta": { "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015 }, "together/together-ai-up-to-3b": { "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.0000001 }, "together/together-ai-3.1b-7b": { "input_cost_per_token": 0.0000002, "output_cost_per_token": 0.0000002 }, "together/together-ai-7.1b-20b": { "max_tokens": 1000, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004 }, "together/together-ai-20.1b-40b": { "input_cost_per_token": 0.0000008, "output_cost_per_token": 0.0000008 }, "together/together-ai-40.1b-70b": { "input_cost_per_token": 0.0000009, "output_cost_per_token": 0.0000009 }, "together/mistralai/Mixtral-8x7B-Instruct-v0.1": { "input_cost_per_token": 0.0000006, "output_cost_per_token": 0.0000006 } }