Examples
Reference configurations for common setups.
RAG application
models:
- name: embedder
type: sentence-transformers
model: all-MiniLM-L6-v2
- name: generator
type: llama2-13b
device: cuda
pipeline:
- ingest:
source: ./documents
chunk_size: 512
overlap: 50
- embed:
model: embedder
- store:
type: chromadb
persist: ./db
- retrieve:
model: embedder
top_k: 5
- generate:
model: generator
template: |
Context: {context}
Question: {question}
Answer:
Multi-language
models:
- name: translator
type: m2m100
- name: multilingual
type: xlm-roberta
pipeline:
- detect_language:
model: multilingual
- translate:
model: translator
target: english
when: language != "en"
- process:
model: llama2
- translate_back:
model: translator
target: ${detected_language}
A/B testing
experiments:
- name: prompt-test
variants:
- name: variant-a
model: llama2
prompt: 'You are a helpful assistant.'
weight: 50
- name: variant-b
model: llama2
prompt: 'You are a knowledgeable expert.'
weight: 50
metrics:
- response_quality
- user_satisfaction
- response_time