Configuration

Configuration modules manage connections to external services and application settings.

Database Configuration

MongoDB (`config/database.js`)

import mongoose from 'mongoose';
import logger from './logger.js';

export async function connectDB() {
  try {
    const conn = await mongoose.connect(process.env.MONGODB_URI, {
      maxPoolSize: 10,
      serverSelectionTimeoutMS: 5000,
      socketTimeoutMS: 45000,
    });

    logger.info(`MongoDB connected: ${conn.connection.host}`);

    // Handle connection events
    mongoose.connection.on('error', (err) => {
      logger.error('MongoDB error:', err);
    });

    mongoose.connection.on('disconnected', () => {
      logger.warn('MongoDB disconnected');
    });

    return conn;
  } catch (error) {
    logger.error('MongoDB connection failed:', error);
    process.exit(1);
  }
}

Redis (`config/redis.js`)

import { Redis } from 'ioredis';
import logger from './logger.js';

export const redisClient = new Redis({
  host: process.env.REDIS_HOST || 'localhost',
  port: parseInt(process.env.REDIS_PORT) || 6378,
  maxRetriesPerRequest: 3,
  retryDelayOnFailover: 100,
});

redisClient.on('connect', () => {
  logger.info('Redis connected');
});

redisClient.on('error', (err) => {
  logger.error('Redis error:', err);
});

// For BullMQ (needs IORedis instance)
export const redisConnection = {
  host: process.env.REDIS_HOST || 'localhost',
  port: parseInt(process.env.REDIS_PORT) || 6378,
};

LLM Configuration

LLM Provider (`config/llm.js`)

import { AzureChatOpenAI } from '@langchain/openai';
import logger from './logger.js';

let defaultLLM = null;
let judgeLLM = null;

export async function getDefaultLLM() {
  if (defaultLLM) return defaultLLM;

  defaultLLM = new AzureChatOpenAI({
    azureOpenAIApiKey: process.env.AZURE_OPENAI_API_KEY,
    azureOpenAIApiInstanceName: extractInstanceName(process.env.AZURE_OPENAI_ENDPOINT),
    azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_LLM_DEPLOYMENT || 'gpt-4o-mini',
    azureOpenAIApiVersion: process.env.AZURE_OPENAI_API_VERSION || '2024-02-15-preview',
    temperature: parseFloat(process.env.LLM_TEMPERATURE) || 0.3,
    maxTokens: parseInt(process.env.LLM_MAX_TOKENS) || 2000,
  });

  // Warm up the model
  try {
    await defaultLLM.invoke('Hello');
    logger.info('LLM initialized and warmed up', {
      deployment: process.env.AZURE_OPENAI_LLM_DEPLOYMENT,
    });
  } catch (error) {
    logger.warn('LLM warmup failed:', error.message);
  }

  return defaultLLM;
}

export async function getJudgeLLM() {
  if (judgeLLM) return judgeLLM;

  judgeLLM = new AzureChatOpenAI({
    azureOpenAIApiKey: process.env.AZURE_OPENAI_API_KEY,
    azureOpenAIApiInstanceName: extractInstanceName(process.env.AZURE_OPENAI_ENDPOINT),
    azureOpenAIApiDeploymentName: process.env.JUDGE_LLM_MODEL || 'gpt-4o-mini',
    azureOpenAIApiVersion: process.env.AZURE_OPENAI_API_VERSION || '2024-02-15-preview',
    temperature: 0,  // Deterministic for evaluation
  });

  return judgeLLM;
}

Embeddings (`config/embeddings.js`)

The embeddings module wraps AzureOpenAIEmbeddings from @langchain/openai inside a custom BatchedEmbeddings class that adds automatic batching, text truncation, and metrics tracking.

Provider: Azure OpenAI — text-embedding-3-small (1 536-dimension vectors, Cosine distance). Hybrid mode (ENABLE_HYBRID_EMBEDDINGS) is disabled; Azure is the only active provider.

// Simplified structure — actual class has ~350 lines
import { AzureOpenAIEmbeddings } from '@langchain/openai';

const baseEmbeddings = new AzureOpenAIEmbeddings({
  azureOpenAIApiKey: process.env.AZURE_OPENAI_API_KEY,
  azureOpenAIApiInstanceName: AZURE_OPENAI_INSTANCE_NAME, // extracted from AZURE_OPENAI_ENDPOINT
  azureOpenAIApiDeploymentName: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT || 'text-embedding-3-small',
  azureOpenAIApiVersion: process.env.AZURE_OPENAI_API_VERSION || '2024-02-15-preview',
  maxConcurrency: parseInt(process.env.EMBEDDING_MAX_CONCURRENCY) || 10,
});

export const embeddings = new BatchedEmbeddings(baseEmbeddings, BATCH_CONFIG);

Batch Configuration

export const BATCH_CONFIG = {
  maxChunks: parseInt(process.env.EMBEDDING_BATCH_MAX_CHUNKS) || 50,  // max chunks per API call
  maxTokens: parseInt(process.env.EMBEDDING_BATCH_MAX_TOKENS) || 8192, // max tokens per batch
  charsPerToken: 4,                                                     // estimation ratio
};

Batches are split whenever either limit (maxChunks or maxTokens) would be exceeded. Each batch is sent as a single embedDocuments() call.

Truncation & Retry

Texts exceeding maxCharsPerChunk are truncated before embedding. If the API still returns a context-length error, the system retries with progressively shorter inputs:

Attempt	Text length
1	Full (truncated to `maxCharsPerChunk`)
2	50% of original
3	25% of original
4	10% of original (minimum 100 chars)

Metrics

import { getEmbeddingMetrics } from './config/embeddings.js';

const m = getEmbeddingMetrics();
// { totalChunksEmbedded, totalBatches, chunksPerSecond, avgBatchLatencyMs, errors, truncations }

Vector Store Configuration

Qdrant (`config/vectorStore.js`)

import { QdrantClient } from '@qdrant/js-client-rest';
import { QdrantVectorStore } from '@langchain/qdrant';
import { getEmbeddings } from './embeddings.js';

const qdrantClient = new QdrantClient({
  url: process.env.QDRANT_URL || 'http://localhost:6333',
});

export async function getVectorStore(documents = []) {
  const embeddings = await getEmbeddings();
  const collectionName = process.env.QDRANT_COLLECTION_NAME || 'documents';

  // Ensure collection exists
  const collections = await qdrantClient.getCollections();
  const exists = collections.collections.some(c => c.name === collectionName);

  if (!exists) {
    await qdrantClient.createCollection(collectionName, {
      vectors: {
        size: 1536,  // text-embedding-3-small dimension
        distance: 'Cosine',
      },
    });
  }

  return new QdrantVectorStore(embeddings, {
    client: qdrantClient,
    collectionName,
  });
}

export { qdrantClient };

Queue Configuration

BullMQ Queues (`config/queue.js`)

import { Queue } from 'bullmq';
import { redisConnection } from './redis.js';

export const assessmentQueue = new Queue('assessmentJobs', {
  connection: redisConnection,
  defaultJobOptions: {
    attempts: 3,
    backoff: { type: 'exponential', delay: 30000 },
    removeOnComplete: { count: 50, age: 7 * 24 * 60 * 60 },
    removeOnFail: { count: 100 },
  },
});

Logging Configuration

Winston Logger (`config/logger.js`)

import winston from 'winston';

const logFormat = winston.format.combine(
  winston.format.timestamp(),
  winston.format.errors({ stack: true }),
  winston.format.json()
);

const logger = winston.createLogger({
  level: process.env.LOG_LEVEL || 'info',
  format: logFormat,
  defaultMeta: { service: 'rag-backend' },
  transports: [
    new winston.transports.Console({
      format: winston.format.combine(
        winston.format.colorize(),
        winston.format.simple()
      ),
    }),
  ],
});

// Add file transport in production
if (process.env.NODE_ENV === 'production') {
  logger.add(new winston.transports.File({
    filename: 'logs/error.log',
    level: 'error',
  }));
  logger.add(new winston.transports.File({
    filename: 'logs/combined.log',
  }));
}

export default logger;

Guardrails Configuration

LLM Guardrails (`config/guardrails.js`)

export const guardrailsConfig = {
  input: {
    maxLength: 5000,
    blockedPatterns: [
      /ignore previous instructions/i,
      /disregard.*system/i,
    ],
  },

  output: {
    hallucinationBlocking: {
      enabled: true,
      strictMode: process.env.STRICT_HALLUCINATION_MODE === 'true',
    },
    confidenceHandling: {
      minConfidence: 0.4,
      messages: {
        blocked: "I wasn't able to find reliable information about this topic in your documents.",
        warning: "Note: This answer has lower confidence.",
      },
    },
  },

  retrieval: {
    maxDocuments: 15,
    maxRetryDocuments: 20,
    minRelevanceScore: 0.3,
  },

  generation: {
    retry: {
      enabled: true,
      minConfidenceForRetry: 0.2,
      retryTimeoutMs: 30000,
      cooldownMs: 1000,
    },
  },
};

Environment Variables

Complete `.env.example`

# ===========================================
# Server Configuration
# ===========================================
PORT=3007
NODE_ENV=development
LOG_LEVEL=info

# ===========================================
# MongoDB
# ===========================================
MONGODB_URI=mongodb://localhost:27017/enterprise_rag

# ===========================================
# Redis
# ===========================================
REDIS_URL=redis://localhost:6378

# ===========================================
# Qdrant Vector Store
# ===========================================
QDRANT_URL=http://localhost:6333
QDRANT_COLLECTION_NAME=documents

# ===========================================
# Azure OpenAI
# ===========================================
LLM_PROVIDER=azure_openai
EMBEDDING_PROVIDER=azure
AZURE_OPENAI_API_KEY=your-api-key
AZURE_OPENAI_ENDPOINT=https://your-resource.openai.azure.com
AZURE_OPENAI_LLM_DEPLOYMENT=gpt-4o-mini
AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-small
AZURE_OPENAI_API_VERSION=2024-02-15-preview
LLM_TEMPERATURE=0.3
LLM_MAX_TOKENS=2000
JUDGE_LLM_MODEL=gpt-4o-mini

# ===========================================
# Embedding Batching (optional — defaults shown)
# ===========================================
EMBEDDING_MAX_CONCURRENCY=10          # parallel API calls (S0 tier: 5-10 safe)
EMBEDDING_BATCH_MAX_CHUNKS=50         # max chunks per batch request
EMBEDDING_BATCH_MAX_TOKENS=8192       # max tokens per batch request
EMBEDDING_CONTEXT_TOKENS=8192         # model context window (for per-chunk truncation)

# ===========================================
# LLM Timeouts
# ===========================================
LLM_INVOKE_TIMEOUT=60000
LLM_STREAM_INITIAL_TIMEOUT=30000
LLM_STREAM_CHUNK_TIMEOUT=10000

# ===========================================
# JWT Authentication
# ===========================================
JWT_ACCESS_SECRET=             # generate: openssl rand -base64 48
JWT_REFRESH_SECRET=            # generate: openssl rand -base64 48
JWT_ACCESS_EXPIRY=15m
JWT_REFRESH_EXPIRY=7d

# ===========================================
# Encryption
# ===========================================
ENCRYPTION_KEY=                # generate: openssl rand -hex 32

# ===========================================
# CORS
# ===========================================
FRONTEND_URL=http://localhost:3000
ALLOWED_ORIGINS=http://localhost:3000

# ===========================================
# Chunking Configuration
# ===========================================
MAX_GROUP_TOKENS=400
MIN_GROUP_TOKENS=200
MAX_LIST_ITEMS=15

# ===========================================
# Quality Guardrails
# ===========================================
GUARDRAIL_STRICT_HALLUCINATION_BLOCKING=true
ENABLE_CODE_FILTER=true

# ===========================================
# Observability
# ===========================================
LOG_RETRIEVAL_TRACE=false
LANGSMITH_API_KEY=your-langsmith-key
LANGSMITH_PROJECT=retrieva
LANGSMITH_ENABLED=true

Configuration Validation

// config/envValidator.js

const requiredVars = [
  'MONGODB_URI',
  'REDIS_HOST',
  'QDRANT_URL',
  'JWT_SECRET',
  'JWT_REFRESH_SECRET',
];

export function validateEnv() {
  const missing = requiredVars.filter(v => !process.env[v]);

  if (missing.length > 0) {
    throw new Error(`Missing required environment variables: ${missing.join(', ')}`);
  }

  // Validate JWT secret strength
  if (process.env.JWT_SECRET.length < 32) {
    console.warn('WARNING: JWT_SECRET should be at least 32 characters');
  }
}

Database Configuration​

MongoDB (config/database.js)​

Redis (config/redis.js)​

LLM Configuration​

LLM Provider (config/llm.js)​

Embeddings (config/embeddings.js)​

Batch Configuration​

Truncation & Retry​

Metrics​

Vector Store Configuration​

Qdrant (config/vectorStore.js)​

Queue Configuration​

BullMQ Queues (config/queue.js)​

Logging Configuration​

Winston Logger (config/logger.js)​

Guardrails Configuration​

LLM Guardrails (config/guardrails.js)​

Environment Variables​

Complete .env.example​

Configuration Validation​