OpenWebUI: python knowledge PDF CLI API upload
owui_upload_kb.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Upload PDF ke Open WebUI Knowledge via API (tanpa GUI).
Fitur:
- Buat knowledge collection (jika belum ada)
- Upload banyak PDF (folder / list path)
- Tambah setiap file ke collection
- Anti-duplikat sederhana (skip jika server balas "Duplicate content")
- Uji query RAG terhadap collection (opsional)
Dok:
- Upload file: POST /api/v1/files/
- Tambah ke knowledge: POST /api/v1/knowledge/{id}/file/add
- Buat knowledge: POST /api/v1/knowledge/create
"""
import os
import sys
import time
import json
import glob
import argparse
import requests
from typing import List, Optional
def api_headers(api_key: str, accept_json: bool = True):
h = {"Authorization": f"Bearer {api_key}"}
if accept_json:
h["Accept"] = "application/json"
return h
def create_knowledge(base_url: str, api_key: str, name: str, description: str = "") -> str:
url = f"{base_url}/api/v1/knowledge/create"
payload = {"name": name, "description": description}
r = requests.post(url, headers={**api_headers(api_key), "Content-Type": "application/json"}, json=payload, timeout=120)
r.raise_for_status()
data = r.json()
kb_id = data.get("id")
if not kb_id:
raise RuntimeError(f"Gagal membuat knowledge: {data}")
return kb_id
def upload_file(base_url: str, api_key: str, file_path: str) -> Optional[str]:
url = f"{base_url}/api/v1/files/"
with open(file_path, "rb") as f:
files = {"file": (os.path.basename(file_path), f, "application/pdf")}
r = requests.post(url, headers=api_headers(api_key), files=files, timeout=600)
if r.status_code >= 400:
# Banyak kasus gagal karena file khusus, PDF rusak, dsb.
print(f"[WARN] Upload gagal: {file_path} -> {r.status_code} {r.text}")
return None
data = r.json()
return data.get("id") # file_id
def add_file_to_knowledge(base_url: str, api_key: str, knowledge_id: str, file_id: str) -> bool:
url = f"{base_url}/api/v1/knowledge/{knowledge_id}/file/add"
payload = {"file_id": file_id}
r = requests.post(url, headers={**api_headers(api_key), "Content-Type": "application/json"}, json=payload, timeout=120)
if r.status_code == 400 and "Duplicate content" in r.text:
# Open WebUI bisa mengembalikan 400 duplicate bila konten sama sudah ada
print(f"[INFO] Duplikat terdeteksi, skip file_id={file_id}")
return False
r.raise_for_status()
return True
def find_or_create_kb(base_url: str, api_key: str, kb_name: str, kb_description: str = "") -> str:
"""
Jika Anda sudah tahu UUID knowledge, langsung pakai.
Kalau belum, cara termudah adalah buat baru dengan nama yang diinginkan.
"""
print(f"[STEP] Membuat knowledge '{kb_name}'")
return create_knowledge(base_url, api_key, kb_name, kb_description)
def collect_pdf_paths(input_path: str) -> List[str]:
if os.path.isdir(input_path):
# Ambil semua PDF di folder (tanpa rekursif). Ubah ke **/*.pdf jika ingin rekursif.
return sorted(glob.glob(os.path.join(input_path, "*.pdf")))
elif os.path.isfile(input_path):
return [input_path]
else:
raise FileNotFoundError(f"Path tidak ditemukan: {input_path}")
def rag_test_query(base_url: str, api_key: str, model: str, kb_id: str, user_query: str) -> str:
"""
Gunakan chat completions + files:[{type:'collection', id:kb_id}] untuk uji RAG.
"""
url = f"{base_url}/api/chat/completions"
payload = {
"model": model,
"messages": [{"role": "user", "content": user_query}],
"files": [{"type": "collection", "id": kb_id}],
}
r = requests.post(url, headers={**api_headers(api_key), "Content-Type": "application/json"}, json=payload, timeout=600)
r.raise_for_status()
data = r.json()
# Bentuk respons mengikuti OpenAI-compatible schema.
try:
return data["choices"][0]["message"]["content"]
except Exception:
return json.dumps(data, ensure_ascii=False, indent=2)
def main():
p = argparse.ArgumentParser(description="Upload PDF ke Open WebUI Knowledge (tanpa GUI).")
p.add_argument("--base-url", required=True, help="Contoh: http://localhost:3000")
p.add_argument("--api-key", required=True, help="API Key dari Settings > Account")
p.add_argument("--kb-name", required=True, help="Nama knowledge (collection) yang akan dibuat")
p.add_argument("--kb-desc", default="", help="Deskripsi knowledge")
p.add_argument("--input", required=True, help="Path ke file PDF atau folder berisi PDF")
p.add_argument("--model", default="llama3.1", help="Nama model untuk uji RAG (opsional)")
p.add_argument("--test-query", default="", help="Jika diisi, lakukan uji query RAG ke collection")
p.add_argument("--sleep-after-upload", type=int, default=3, help="Delay (detik) antar upload untuk memberi waktu proses embedding")
args = p.parse_args()
base_url = args.base_url.rstrip("/")
api_key = args.api_key
# 1) Buat knowledge
kb_id = find_or_create_kb(base_url, api_key, args.kb_name, args.kb_desc)
print(f"[OK] Knowledge dibuat: {kb_id}")
# 2) Kumpulkan PDF
pdfs = collect_pdf_paths(args.input)
if not pdfs:
print("[WARN] Tidak ada PDF ditemukan.")
sys.exit(0)
print(f"[STEP] Menemukan {len(pdfs)} file PDF")
# 3) Upload + tambahkan ke knowledge
uploaded = 0
for path in pdfs:
print(f"[STEP] Upload: {path}")
file_id = upload_file(base_url, api_key, path)
if not file_id:
continue
print(f"[OK] File terupload, file_id={file_id} -> tambah ke knowledge")
try:
add_file_to_knowledge(base_url, api_key, kb_id, file_id)
uploaded += 1
except requests.HTTPError as e:
print(f"[ERR] Gagal tambah ke knowledge: {e.response.status_code} {e.response.text}")
# beri jeda kecil agar proses embedding tidak numpuk (praktis untuk CPU-only)
time.sleep(args.sleep_after_upload)
print(f"[DONE] Selesai. Total file berhasil diproses: {uploaded}/{len(pdfs)}")
# 4) Uji RAG (opsional)
if args.test_query:
print(f"[TEST] Jalankan uji RAG model={args.model}")
answer = rag_test_query(base_url, api_key, args.model, kb_id, args.test_query)
print("\n=== JAWABAN RAG ===\n")
print(answer)
print("\n===================\n")
if __name__ == "__main__":
main()
owui-pakai
# 0) Set variabel (opsional) export OWUI_URL="http://localhost:3000" export OWUI_KEY="sk-xxxxxx" # 1) Jalankan: buat KB + upload semua PDF di folder ./dokumen python3 owui_upload_kb.py \ --base-url "$OWUI_URL" \ --api-key "$OWUI_KEY" \ --kb-name "Dokumen Kampus" \ --kb-desc "Koleksi PDF Peraturan & Panduan" \ --input "./dokumen" \ --model "llama3.1" \ --test-query "Ringkas poin penting dari semua dokumen tentang akreditasi BAN-PT." # Atau untuk satu file saja: python3 owui_upload_kb.py \ --base-url "$OWUI_URL" \ --api-key "$OWUI_KEY" \ --kb-name "RIP ITTS 2025-2050" \ --input "./RIP-ITTS-2025-2050.pdf" \ --test-query "Apa visi utama dokumen ini?"