Creates a free API proxy that translates OpenAI-style requests to NVIDIA NIM API, allowing you to use NVIDIA's AI models in apps like Chub AI.
Features:
openai-nim-proxyOpenAI compatible proxy for NVIDIA NIM APIserver.js// server.js - OpenAI to NVIDIA NIM API Proxy
const express = require('express');
const cors = require('cors');
const axios = require('axios');
const app = express();
const PORT = process.env.PORT || 3000;
// Middleware
app.use(cors());
app.use(express.json());
// NVIDIA NIM API configuration
const NIM_API_BASE = process.env.NIM_API_BASE || 'https://integrate.api.nvidia.com/v1';
const NIM_API_KEY = process.env.NIM_API_KEY;
// 🔥 REASONING DISPLAY TOGGLE - Shows/hides reasoning in output
const SHOW_REASONING = false; // Set to true to show reasoning with <think> tags
// 🔥 THINKING MODE TOGGLE - Enables thinking for specific models that support it
const ENABLE_THINKING_MODE = false; // Set to true to enable chat_template_kwargs thinking parameter
// Model mapping (adjust based on available NIM models)
const MODEL_MAPPING = {
'gpt-3.5-turbo': 'nvidia/llama-3.1-nemotron-ultra-253b-v1',
'gpt-4': 'qwen/qwen3-coder-480b-a35b-instruct',
'gpt-4-turbo': 'moonshotai/kimi-k2-instruct-0905',
'gpt-4o': 'deepseek-ai/deepseek-v3.1',
'claude-3-opus': 'openai/gpt-oss-120b',
'claude-3-sonnet': 'openai/gpt-oss-20b',
'gemini-pro': 'qwen/qwen3-next-80b-a3b-thinking'
};
// Health check endpoint
app.get('/health', (req, res) => {
res.json({
status: 'ok',
service: 'OpenAI to NVIDIA NIM Proxy',
reasoning_display: SHOW_REASONING,
thinking_mode: ENABLE_THINKING_MODE
});
});
// List models endpoint (OpenAI compatible)
app.get('/v1/models', (req, res) => {
const models = Object.keys(MODEL_MAPPING).map(model => ({
id: model,
object: 'model',
created: Date.now(),
owned_by: 'nvidia-nim-proxy'
}));
res.json({
object: 'list',
data: models
});
});
// Chat completions endpoint (main proxy)
app.post('/v1/chat/completions', async (req, res) => {
try {
const { model, messages, temperature, max_tokens, stream } = req.body;
// Smart model selection with fallback
let nimModel = MODEL_MAPPING[model];
if (!nimModel) {
try {
await axios.post(`${NIM_API_BASE}/chat/completions`, {
model: model,
messages: [{ role: 'user', content: 'test' }],
max_tokens: 1
}, {
headers: { 'Authorization': `Bearer ${NIM_API_KEY}`, 'Content-Type': 'application/json' },
validateStatus: (status) => status < 500
}).then(res => {
if (res.status >= 200 && res.status < 300) {
nimModel = model;
}
});
} catch (e) {}
if (!nimModel) {
const modelLower = model.toLowerCase();
if (modelLower.includes('gpt-4') || modelLower.includes('claude-opus') || modelLower.includes('405b')) {
nimModel = 'meta/llama-3.1-405b-instruct';
} else if (modelLower.includes('claude') || modelLower.includes('gemini') || modelLower.includes('70b')) {
nimModel = 'meta/llama-3.1-70b-instruct';
} else {
nimModel = 'meta/llama-3.1-8b-instruct';
}
}
}
// Transform OpenAI request to NIM format
const nimRequest = {
model: nimModel,
messages: messages,
temperature: temperature || 0.6,
max_tokens: max_tokens || 9024,
extra_body: ENABLE_THINKING_MODE ? { chat_template_kwargs: { thinking: true } } : undefined,
stream: stream || false
};
// Make request to NVIDIA NIM API
const response = await axios.post(`${NIM_API_BASE}/chat/completions`, nimRequest, {
headers: {
'Authorization': `Bearer ${NIM_API_KEY}`,
'Content-Type': 'application/json'
},
responseType: stream ? 'stream' : 'json'
});
if (stream) {
// Handle streaming response with reasoning
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
let buffer = '';
let reasoningStarted = false;
response.data.on('data', (chunk) => {
buffer += chunk.toString();
const lines = buffer.split('\n');
buffer = lines.pop() || '';
lines.forEach(line => {
if (line.startsWith('data: ')) {
if (line.includes('[DONE]')) {
res.write(line + '\n');
return;
}
try {
const data = JSON.parse(line.slice(6));
if (data.choices?.[0]?.delta) {
const reasoning = data.choices[0].delta.reasoning_content;
const content = data.choices[0].delta.content;
if (SHOW_REASONING) {
let combinedContent = '';
if (reasoning && !reasoningStarted) {
combinedContent = '<think>\n' + reasoning;
reasoningStarted = true;
} else if (reasoning) {
combinedContent = reasoning;
}
if (content && reasoningStarted) {
combinedContent += '</think>\n\n' + content;
reasoningStarted = false;
} else if (content) {
combinedContent += content;
}
if (combinedContent) {
data.choices[0].delta.content = combinedContent;
delete data.choices[0].delta.reasoning_content;
}
} else {
if (content) {
data.choices[0].delta.content = content;
} else {
data.choices[0].delta.content = '';
}
delete data.choices[0].delta.reasoning_content;
}
}
res.write(`data: ${JSON.stringify(data)}\n\n`);
} catch (e) {
res.write(line + '\n');
}
}
});
});
response.data.on('end', () => res.end());
response.data.on('error', (err) => {
console.error('Stream error:', err);
res.end();
});
} else {
// Transform NIM response to OpenAI format with reasoning
const openaiResponse = {
id: `chatcmpl-${Date.now()}`,
object: 'chat.completion',
created: Math.floor(Date.now() / 1000),
model: model,
choices: response.data.choices.map(choice => {
let fullContent = choice.message?.content || '';
if (SHOW_REASONING && choice.message?.reasoning_content) {
fullContent = '<think>\n' + choice.message.reasoning_content + '\n</think>\n\n' + fullContent;
}
return {
index: choice.index,
message: {
role: choice.message.role,
content: fullContent
},
finish_reason: choice.finish_reason
};
}),
usage: response.data.usage || {
prompt_tokens: 0,
completion_tokens: 0,
total_tokens: 0
}
};
res.json(openaiResponse);
}
} catch (error) {
console.error('Proxy error:', error.message);
res.status(error.response?.status || 500).json({
error: {
message: error.message || 'Internal server error',
type: 'invalid_request_error',
code: error.response?.status || 500
}
});
}
});
// Catch-all for unsupported endpoints
app.all('*', (req, res) => {
res.status(404).json({
error: {
message: `Endpoint ${req.path} not found`,
type: 'invalid_request_error',
code: 404
}
});
});
app.listen(PORT, () => {
console.log(`OpenAI to NVIDIA NIM Proxy running on port ${PORT}`);
console.log(`Health check: http://localhost:${PORT}/health`);
console.log(`Reasoning display: ${SHOW_REASONING ? 'ENABLED' : 'DISABLED'}`);
console.log(`Thinking mode: ${ENABLE_THINKING_MODE ? 'ENABLED' : 'DISABLED'}`);
});package.json{
"name": "openai-nim-proxy",
"version": "1.0.0",
"description": "OpenAI compatible proxy for NVIDIA NIM API",
"main": "server.js",
"scripts": {
"start": "node server.js"
},
"dependencies": {
"express": "^4.18.2",
"cors": "^2.8.5",
"axios": "^1.6.0"
},
"engines": {
"node": "18.x"
}
}openai-nim-proxy repositoryNIM_API_KEYhttps://openai-nim-proxy-production-xxxx.up.railway.app)https://your-domain.up.railway.app/health{
"status": "ok",
"service": "OpenAI to NVIDIA NIM Proxy",
"reasoning_display": false,
"thinking_mode": false
}https://your-domain.up.railway.appdummy-key)gpt-4o, gpt-4, or claude-3-opusconst SHOW_REASONING = false; // Change to true to see thinking processWhen true:
<think> tags <think>
[reasoning process]
</think>
[final answer]When false (default):
const ENABLE_THINKING_MODE = false; // Change to true for models with thinking toggleWhen true:
extra_body: { chat_template_kwargs: { thinking: true } } parameterWhen false (default):
You can customize which NVIDIA models are used:
const MODEL_MAPPING = {
'gpt-3.5-turbo': 'nvidia/llama-3.1-nemotron-ultra-253b-v1',
'gpt-4': 'qwen/qwen3-coder-480b-a35b-instruct',
// Add more mappings here
};| Janitor AI Model | Maps to NVIDIA Model |
|---|---|
| gpt-3.5-turbo | Llama Nemotron 253B |
| gpt-4 | Qwen3 Coder 480B |
| gpt-4-turbo | Kimi K2 |
| gpt-4o | DeepSeek V3.1 |
| claude-3-opus | GPT-OSS 120B |
| claude-3-sonnet | GPT-OSS 20B |
| gemini-pro | Qwen3 Next 80B Thinking |
Solution: This is normal! Test /health endpoint instead.
Solutions:
NIM_API_KEY is set correctlySolutions:
Solutions:
Solution:
NIM_API_KEY environment variableNIM_API_KEY environment variablefly launchfly secrets set NIM_API_KEY=your_keyhttps://your-domain.up.railway.app/healthhttps://your-domain.up.railway.app/v1/modelshttps://your-domain.up.railway.app/v1/chat/completionsYour proxy is now live and ready to use! 🚀