Add vibe-aware playlists with CLAP text embeddings

Blend taste profile with text-embedded mood descriptions (e.g. "chill
ambient lo-fi") using pre-blended vector search against the existing
HNSW index. New optional `vibe` and `alpha` params on playlist generate
and recommendations endpoints. Backward compatible — no vibe = pure
taste profile (alpha=1.0).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 13:14:28 -06:00
parent 23fd0e9804
commit 1b739fbd20
9 changed files with 146 additions and 12 deletions

View File

@@ -41,6 +41,16 @@ curl http://192.168.86.51:8321/api/recommendations?limit=20
curl -X POST http://192.168.86.51:8321/api/playlists/generate \ curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{"total_tracks":20,"known_pct":30,"speaker_entity":"media_player.living_room_speaker_2","auto_play":true}' -d '{"total_tracks":20,"known_pct":30,"speaker_entity":"media_player.living_room_speaker_2","auto_play":true}'
# Generate a vibe-based playlist (mood/activity matching)
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \
-d '{"total_tracks":15,"vibe":"chill ambient lo-fi","speaker_entity":"media_player.living_room_speaker_2","auto_play":true}'
# Vibe with custom blend (alpha: 0=pure vibe, 0.5=blend, 1=pure taste)
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \
-d '{"total_tracks":15,"vibe":"upbeat party music","alpha":0.3,"auto_play":true,"speaker_entity":"media_player.living_room_speaker_2"}'
``` ```
## Environment Variables ## Environment Variables

View File

@@ -51,7 +51,7 @@ docker exec haunt-fm alembic upgrade head
| POST | `/api/history/webhook` | Log a listen event (from HA automation) | | POST | `/api/history/webhook` | Log a listen event (from HA automation) |
| POST | `/api/admin/discover` | Expand listening history via Last.fm | | POST | `/api/admin/discover` | Expand listening history via Last.fm |
| POST | `/api/admin/build-taste-profile` | Rebuild taste profile from embeddings | | POST | `/api/admin/build-taste-profile` | Rebuild taste profile from embeddings |
| GET | `/api/recommendations?limit=50` | Get ranked recommendations | | GET | `/api/recommendations?limit=50&vibe=chill+ambient` | Get ranked recommendations (optional vibe) |
| POST | `/api/playlists/generate` | Generate and optionally play a playlist | | POST | `/api/playlists/generate` | Generate and optionally play a playlist |
## Usage ## Usage
@@ -69,11 +69,26 @@ curl -X POST http://192.168.86.51:8321/api/playlists/generate \
}' }'
``` ```
### Generate a vibe-based playlist
```bash
curl -X POST http://192.168.86.51:8321/api/playlists/generate \
-H "Content-Type: application/json" \
-d '{
"total_tracks": 15,
"vibe": "chill ambient lo-fi",
"speaker_entity": "media_player.living_room_speaker_2",
"auto_play": true
}'
```
**Parameters:** **Parameters:**
- `total_tracks` — number of tracks in the playlist (default 20) - `total_tracks` — number of tracks in the playlist (default 20)
- `known_pct` — percentage of known-liked tracks vs new discoveries (default 30) - `known_pct` — percentage of known-liked tracks vs new discoveries (default 30)
- `speaker_entity` — Music Assistant entity ID (must be a `_2` suffix entity) - `speaker_entity` — Music Assistant entity ID (must be a `_2` suffix entity)
- `auto_play``true` to immediately play on the speaker - `auto_play``true` to immediately play on the speaker
- `vibe` — text description of the desired mood/vibe (e.g. "chill lo-fi beats", "upbeat party music"). Uses CLAP text embeddings to match tracks in the same vector space as audio.
- `alpha` — blend factor between taste profile and vibe (default 0.5). `1.0` = pure taste profile, `0.0` = pure vibe match, `0.5` = equal blend. Ignored when no vibe is provided.
### Speaker entities ### Speaker entities
@@ -113,6 +128,9 @@ curl -X POST http://192.168.86.51:8321/api/admin/build-taste-profile
# Get recommendations (without playing) # Get recommendations (without playing)
curl http://192.168.86.51:8321/api/recommendations?limit=20 curl http://192.168.86.51:8321/api/recommendations?limit=20
# Get vibe-matched recommendations
curl "http://192.168.86.51:8321/api/recommendations?limit=20&vibe=dark+electronic&alpha=0.3"
``` ```
## Pipeline Stages ## Pipeline Stages

View File

@@ -0,0 +1,26 @@
"""Add vibe and alpha columns to playlists
Revision ID: 002
Revises: 001
Create Date: 2026-02-22
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "002"
down_revision: Union[str, None] = "001"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.add_column("playlists", sa.Column("vibe", sa.Text, nullable=True))
op.add_column("playlists", sa.Column("alpha", sa.REAL, nullable=True))
def downgrade() -> None:
op.drop_column("playlists", "alpha")
op.drop_column("playlists", "vibe")

View File

@@ -1,5 +1,5 @@
from fastapi import APIRouter, Depends from fastapi import APIRouter, Depends
from pydantic import BaseModel from pydantic import BaseModel, Field
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
@@ -17,15 +17,31 @@ class GenerateRequest(BaseModel):
name: str | None = None name: str | None = None
speaker_entity: str | None = None speaker_entity: str | None = None
auto_play: bool = False auto_play: bool = False
vibe: str | None = None
alpha: float = Field(default=0.5, ge=0.0, le=1.0)
@router.post("/generate") @router.post("/generate")
async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_session)): async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_session)):
# Compute text embedding for vibe description
vibe_embedding = None
if req.vibe:
from haunt_fm.services.embedding import embed_text, is_model_loaded, load_model
if not is_model_loaded():
load_model()
vibe_embedding = embed_text(req.vibe)
# Force pure taste when no vibe provided (preserves current behavior)
alpha = req.alpha if req.vibe else 1.0
playlist = await generate_playlist( playlist = await generate_playlist(
session, session,
total_tracks=req.total_tracks, total_tracks=req.total_tracks,
known_pct=req.known_pct, known_pct=req.known_pct,
name=req.name, name=req.name,
vibe_embedding=vibe_embedding,
alpha=alpha,
vibe_text=req.vibe,
) )
# Load playlist tracks with track info # Load playlist tracks with track info
@@ -58,6 +74,8 @@ async def generate(req: GenerateRequest, session: AsyncSession = Depends(get_ses
"name": playlist.name, "name": playlist.name,
"total_tracks": playlist.total_tracks, "total_tracks": playlist.total_tracks,
"known_pct": playlist.known_pct, "known_pct": playlist.known_pct,
"vibe": playlist.vibe,
"alpha": playlist.alpha,
"tracks": track_list, "tracks": track_list,
"auto_played": req.auto_play and req.speaker_entity is not None, "auto_played": req.auto_play and req.speaker_entity is not None,
} }

View File

@@ -11,9 +11,22 @@ router = APIRouter(prefix="/api")
async def recommendations( async def recommendations(
limit: int = Query(default=50, ge=1, le=200), limit: int = Query(default=50, ge=1, le=200),
include_known: bool = Query(default=False), include_known: bool = Query(default=False),
vibe: str | None = Query(default=None),
alpha: float = Query(default=0.5, ge=0.0, le=1.0),
session: AsyncSession = Depends(get_session), session: AsyncSession = Depends(get_session),
): ):
vibe_embedding = None
if vibe:
from haunt_fm.services.embedding import embed_text, is_model_loaded, load_model
if not is_model_loaded():
load_model()
vibe_embedding = embed_text(vibe)
# Force pure taste when no vibe provided
effective_alpha = alpha if vibe else 1.0
results = await get_recommendations( results = await get_recommendations(
session, limit=limit, exclude_known=not include_known session, limit=limit, exclude_known=not include_known,
vibe_embedding=vibe_embedding, alpha=effective_alpha,
) )
return {"recommendations": results, "count": len(results)} return {"recommendations": results, "count": len(results), "vibe": vibe, "alpha": effective_alpha}

View File

@@ -94,6 +94,8 @@ class Playlist(Base):
name: Mapped[str | None] = mapped_column(Text) name: Mapped[str | None] = mapped_column(Text)
known_pct: Mapped[int] = mapped_column(Integer, nullable=False) known_pct: Mapped[int] = mapped_column(Integer, nullable=False)
total_tracks: Mapped[int] = mapped_column(Integer, nullable=False) total_tracks: Mapped[int] = mapped_column(Integer, nullable=False)
vibe: Mapped[str | None] = mapped_column(Text)
alpha: Mapped[float | None] = mapped_column(REAL)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now()) created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
tracks: Mapped[list["PlaylistTrack"]] = relationship(back_populates="playlist", cascade="all, delete-orphan") tracks: Mapped[list["PlaylistTrack"]] = relationship(back_populates="playlist", cascade="all, delete-orphan")

View File

@@ -53,3 +53,23 @@ def embed_audio(audio: np.ndarray, sample_rate: int = 48000) -> np.ndarray:
# Normalize to unit vector # Normalize to unit vector
emb = emb / np.linalg.norm(emb) emb = emb / np.linalg.norm(emb)
return emb return emb
def embed_text(text: str) -> np.ndarray:
"""Embed a text description into the same 512-dim CLAP space as audio."""
import torch
if _model is None or _processor is None:
raise RuntimeError("CLAP model not loaded. Call load_model() first.")
inputs = _processor(text=[text], return_tensors="pt", padding=True)
with torch.no_grad():
output = _model.get_text_features(**inputs)
if hasattr(output, "pooler_output"):
emb = output.pooler_output[0].numpy()
else:
emb = output[0].numpy()
emb = emb / np.linalg.norm(emb)
return emb

View File

@@ -1,6 +1,7 @@
import logging import logging
import random import random
import numpy as np
from sqlalchemy import func, select from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
@@ -20,6 +21,9 @@ async def generate_playlist(
total_tracks: int = 20, total_tracks: int = 20,
known_pct: int = 30, known_pct: int = 30,
name: str | None = None, name: str | None = None,
vibe_embedding: np.ndarray | None = None,
alpha: float = 0.5,
vibe_text: str | None = None,
) -> Playlist: ) -> Playlist:
"""Generate a playlist mixing known-liked tracks with new recommendations. """Generate a playlist mixing known-liked tracks with new recommendations.
@@ -48,7 +52,10 @@ async def generate_playlist(
new_count = total_tracks - known_count new_count = total_tracks - known_count
# Get new recommendations # Get new recommendations
recs = await get_recommendations(session, limit=new_count * 2, exclude_known=True) recs = await get_recommendations(
session, limit=new_count * 2, exclude_known=True,
vibe_embedding=vibe_embedding, alpha=alpha,
)
new_tracks = [(r["track_id"], r["similarity"]) for r in recs[:new_count]] new_tracks = [(r["track_id"], r["similarity"]) for r in recs[:new_count]]
# Interleave: spread known tracks throughout the playlist # Interleave: spread known tracks throughout the playlist
@@ -84,6 +91,8 @@ async def generate_playlist(
name=name or f"haunt-fm mix ({len(interleaved)} tracks)", name=name or f"haunt-fm mix ({len(interleaved)} tracks)",
known_pct=known_pct, known_pct=known_pct,
total_tracks=len(interleaved), total_tracks=len(interleaved),
vibe=vibe_text,
alpha=alpha if vibe_text else None,
) )
session.add(playlist) session.add(playlist)
await session.flush() await session.flush()

View File

@@ -1,5 +1,6 @@
import logging import logging
import numpy as np
from sqlalchemy import select, text from sqlalchemy import select, text
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
@@ -18,22 +19,40 @@ async def get_recommendations(
limit: int = 50, limit: int = 50,
exclude_known: bool = True, exclude_known: bool = True,
profile_name: str = "default", profile_name: str = "default",
vibe_embedding: np.ndarray | None = None,
alpha: float = 0.5,
) -> list[dict]: ) -> list[dict]:
"""Get track recommendations ranked by cosine similarity to taste profile.""" """Get track recommendations ranked by cosine similarity to taste profile.
Args:
vibe_embedding: Optional 512-dim text embedding for vibe/mood matching.
alpha: Blend factor. 1.0 = pure taste, 0.0 = pure vibe, 0.5 = equal blend.
"""
# Load taste profile # Load taste profile
profile = ( profile = (
await session.execute(select(TasteProfile).where(TasteProfile.name == profile_name)) await session.execute(select(TasteProfile).where(TasteProfile.name == profile_name))
).scalar_one_or_none() ).scalar_one_or_none()
if profile is None: if profile is None and vibe_embedding is None:
return [] return []
# Determine query vector: blend taste profile with vibe embedding
if vibe_embedding is not None and profile is not None:
taste_emb = np.array(profile.embedding, dtype=np.float32)
vibe_emb = vibe_embedding.astype(np.float32)
query_emb = alpha * taste_emb + (1.0 - alpha) * vibe_emb
norm = np.linalg.norm(query_emb)
if norm > 0:
query_emb = query_emb / norm
elif vibe_embedding is not None:
# No taste profile yet — pure vibe (cold start)
query_emb = vibe_embedding.astype(np.float32)
else:
query_emb = np.array(profile.embedding, dtype=np.float32)
# Use pgvector cosine distance operator (<=>) # Use pgvector cosine distance operator (<=>)
# Lower distance = more similar # Lower distance = more similar
if exclude_known: if exclude_known:
# Subquery: track IDs that have listen events
known_ids_subq = select(ListenEvent.track_id).distinct().subquery()
query = text(""" query = text("""
SELECT t.id, t.title, t.artist, t.album, t.genre, SELECT t.id, t.title, t.artist, t.album, t.genre,
1 - (te.embedding <=> :profile_embedding) AS similarity 1 - (te.embedding <=> :profile_embedding) AS similarity
@@ -54,8 +73,7 @@ async def get_recommendations(
""") """)
# Format embedding as pgvector literal: [n1,n2,...] # Format embedding as pgvector literal: [n1,n2,...]
emb = profile.embedding emb_str = "[" + ",".join(str(float(x)) for x in query_emb) + "]"
emb_str = "[" + ",".join(str(float(x)) for x in emb) + "]"
result = await session.execute( result = await session.execute(
query, query,