Skip to main content

Examples

Reference configurations for common setups.

RAG application

models:
- name: embedder
type: sentence-transformers
model: all-MiniLM-L6-v2

- name: generator
type: llama2-13b
device: cuda

pipeline:
- ingest:
source: ./documents
chunk_size: 512
overlap: 50
- embed:
model: embedder
- store:
type: chromadb
persist: ./db
- retrieve:
model: embedder
top_k: 5
- generate:
model: generator
template: |
Context: {context}
Question: {question}
Answer:

Multi-language

models:
- name: translator
type: m2m100
- name: multilingual
type: xlm-roberta

pipeline:
- detect_language:
model: multilingual
- translate:
model: translator
target: english
when: language != "en"
- process:
model: llama2
- translate_back:
model: translator
target: ${detected_language}

A/B testing

experiments:
- name: prompt-test
variants:
- name: variant-a
model: llama2
prompt: 'You are a helpful assistant.'
weight: 50
- name: variant-b
model: llama2
prompt: 'You are a knowledgeable expert.'
weight: 50
metrics:
- response_quality
- user_satisfaction
- response_time