Initial haunt-fm implementation

Full music recommendation pipeline: listening history capture via webhook,
Last.fm candidate discovery, iTunes preview download, CLAP audio embeddings
(512-dim), pgvector cosine similarity recommendations, playlist generation
with known/new track interleaving, and Music Assistant playback via HA.

Includes: FastAPI app, SQLAlchemy models, Alembic migrations, Docker Compose
with pgvector/pg17, status dashboard, and all API endpoints.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 08:36:36 -06:00
parent 897d0fe1fb
commit 7ff69449d6
39 changed files with 2049 additions and 0 deletions

View File

@@ -0,0 +1,113 @@
from datetime import datetime
from pgvector.sqlalchemy import Vector
from sqlalchemy import BigInteger, DateTime, Index, Integer, Real, Text, func
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from haunt_fm.models.base import Base
class Track(Base):
__tablename__ = "tracks"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
title: Mapped[str] = mapped_column(Text, nullable=False)
artist: Mapped[str] = mapped_column(Text, nullable=False)
album: Mapped[str | None] = mapped_column(Text)
fingerprint: Mapped[str] = mapped_column(Text, unique=True, nullable=False)
lastfm_url: Mapped[str | None] = mapped_column(Text)
itunes_track_id: Mapped[int | None] = mapped_column(BigInteger)
itunes_preview_url: Mapped[str | None] = mapped_column(Text)
apple_music_id: Mapped[str | None] = mapped_column(Text)
duration_ms: Mapped[int | None] = mapped_column(Integer)
genre: Mapped[str | None] = mapped_column(Text)
embedding_status: Mapped[str] = mapped_column(Text, nullable=False, default="pending")
embedding_error: Mapped[str | None] = mapped_column(Text)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
listen_events: Mapped[list["ListenEvent"]] = relationship(back_populates="track")
embedding: Mapped["TrackEmbedding | None"] = relationship(back_populates="track")
class ListenEvent(Base):
__tablename__ = "listen_events"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
track_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
source: Mapped[str] = mapped_column(Text, nullable=False, default="music_assistant")
speaker_name: Mapped[str | None] = mapped_column(Text)
listened_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
duration_played: Mapped[int | None] = mapped_column(Integer)
raw_payload: Mapped[dict | None] = mapped_column(JSONB)
track: Mapped[Track] = relationship(back_populates="listen_events")
class TrackEmbedding(Base):
__tablename__ = "track_embeddings"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
track_id: Mapped[int] = mapped_column(BigInteger, unique=True, nullable=False)
embedding = mapped_column(Vector(512), nullable=False)
model_version: Mapped[str] = mapped_column(Text, nullable=False, default="laion/larger_clap_music")
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
__table_args__ = (
Index("ix_track_embeddings_embedding_hnsw", "embedding", postgresql_using="hnsw", postgresql_with={"m": 16, "ef_construction": 64}, postgresql_ops={"embedding": "vector_cosine_ops"}),
)
track: Mapped[Track] = relationship(back_populates="embedding")
class SimilarityLink(Base):
__tablename__ = "similarity_links"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
source_track_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
target_track_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
lastfm_match: Mapped[float | None] = mapped_column(Real)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
__table_args__ = (
Index("uq_similarity_link", "source_track_id", "target_track_id", unique=True),
)
class TasteProfile(Base):
__tablename__ = "taste_profiles"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
name: Mapped[str] = mapped_column(Text, unique=True, nullable=False, default="default")
embedding = mapped_column(Vector(512), nullable=False)
track_count: Mapped[int] = mapped_column(Integer, nullable=False)
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
class Playlist(Base):
__tablename__ = "playlists"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
name: Mapped[str | None] = mapped_column(Text)
known_pct: Mapped[int] = mapped_column(Integer, nullable=False)
total_tracks: Mapped[int] = mapped_column(Integer, nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
tracks: Mapped[list["PlaylistTrack"]] = relationship(back_populates="playlist", cascade="all, delete-orphan")
class PlaylistTrack(Base):
__tablename__ = "playlist_tracks"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
playlist_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
track_id: Mapped[int] = mapped_column(BigInteger, nullable=False)
position: Mapped[int] = mapped_column(Integer, nullable=False)
is_known: Mapped[bool] = mapped_column(nullable=False)
similarity_score: Mapped[float | None] = mapped_column(Real)
playlist: Mapped[Playlist] = relationship(back_populates="tracks")
track: Mapped[Track] = relationship()