Initial haunt-fm implementation

Full music recommendation pipeline: listening history capture via webhook,
Last.fm candidate discovery, iTunes preview download, CLAP audio embeddings
(512-dim), pgvector cosine similarity recommendations, playlist generation
with known/new track interleaving, and Music Assistant playback via HA.

Includes: FastAPI app, SQLAlchemy models, Alembic migrations, Docker Compose
with pgvector/pg17, status dashboard, and all API endpoints.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-22 08:36:36 -06:00
parent 897d0fe1fb
commit 7ff69449d6
39 changed files with 2049 additions and 0 deletions

69
alembic/env.py Normal file
View File

@@ -0,0 +1,69 @@
import asyncio
import os
from logging.config import fileConfig
from alembic import context
from sqlalchemy import pool
from sqlalchemy.ext.asyncio import async_engine_from_config
from haunt_fm.models.base import Base
# Import all models so they register with Base.metadata
from haunt_fm.models.track import ( # noqa: F401
ListenEvent,
Playlist,
PlaylistTrack,
SimilarityLink,
TasteProfile,
Track,
TrackEmbedding,
)
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
target_metadata = Base.metadata
# Override sqlalchemy.url from environment
db_url = os.environ.get("HAUNTFM_DATABASE_URL", "")
if db_url:
config.set_main_option("sqlalchemy.url", db_url)
def run_migrations_offline() -> None:
url = config.get_main_option("sqlalchemy.url")
context.configure(url=url, target_metadata=target_metadata, literal_binds=True)
with context.begin_transaction():
context.run_migrations()
def do_run_migrations(connection):
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
async def run_async_migrations() -> None:
connectable = async_engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
async with connectable.connect() as connection:
await connection.run_sync(do_run_migrations)
await connectable.dispose()
def run_migrations_online() -> None:
asyncio.run(run_async_migrations())
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

26
alembic/script.py.mako Normal file
View File

@@ -0,0 +1,26 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
# revision identifiers, used by Alembic.
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}

View File

@@ -0,0 +1,121 @@
"""Initial schema
Revision ID: 001
Revises:
Create Date: 2026-02-22
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from pgvector.sqlalchemy import Vector
revision: str = "001"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Enable pgvector extension
op.execute("CREATE EXTENSION IF NOT EXISTS vector")
# Tracks
op.create_table(
"tracks",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("title", sa.Text, nullable=False),
sa.Column("artist", sa.Text, nullable=False),
sa.Column("album", sa.Text),
sa.Column("fingerprint", sa.Text, unique=True, nullable=False),
sa.Column("lastfm_url", sa.Text),
sa.Column("itunes_track_id", sa.BigInteger),
sa.Column("itunes_preview_url", sa.Text),
sa.Column("apple_music_id", sa.Text),
sa.Column("duration_ms", sa.Integer),
sa.Column("genre", sa.Text),
sa.Column("embedding_status", sa.Text, nullable=False, server_default="pending"),
sa.Column("embedding_error", sa.Text),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
# Listen events
op.create_table(
"listen_events",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("track_id", sa.BigInteger, sa.ForeignKey("tracks.id"), nullable=False),
sa.Column("source", sa.Text, nullable=False, server_default="music_assistant"),
sa.Column("speaker_name", sa.Text),
sa.Column("listened_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.Column("duration_played", sa.Integer),
sa.Column("raw_payload", sa.dialects.postgresql.JSONB),
)
# Track embeddings (512-dim CLAP)
op.create_table(
"track_embeddings",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("track_id", sa.BigInteger, sa.ForeignKey("tracks.id"), unique=True, nullable=False),
sa.Column("embedding", Vector(512), nullable=False),
sa.Column("model_version", sa.Text, nullable=False, server_default="laion/larger_clap_music"),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.execute(
"CREATE INDEX ix_track_embeddings_hnsw ON track_embeddings "
"USING hnsw (embedding vector_cosine_ops)"
)
# Similarity links
op.create_table(
"similarity_links",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("source_track_id", sa.BigInteger, sa.ForeignKey("tracks.id"), nullable=False),
sa.Column("target_track_id", sa.BigInteger, sa.ForeignKey("tracks.id"), nullable=False),
sa.Column("lastfm_match", sa.Real),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
sa.UniqueConstraint("source_track_id", "target_track_id", name="uq_similarity_link"),
)
# Taste profiles
op.create_table(
"taste_profiles",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("name", sa.Text, unique=True, nullable=False, server_default="default"),
sa.Column("embedding", Vector(512), nullable=False),
sa.Column("track_count", sa.Integer, nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
# Playlists
op.create_table(
"playlists",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("name", sa.Text),
sa.Column("known_pct", sa.Integer, nullable=False),
sa.Column("total_tracks", sa.Integer, nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now()),
)
op.create_table(
"playlist_tracks",
sa.Column("id", sa.BigInteger, primary_key=True),
sa.Column("playlist_id", sa.BigInteger, sa.ForeignKey("playlists.id", ondelete="CASCADE"), nullable=False),
sa.Column("track_id", sa.BigInteger, sa.ForeignKey("tracks.id"), nullable=False),
sa.Column("position", sa.Integer, nullable=False),
sa.Column("is_known", sa.Boolean, nullable=False),
sa.Column("similarity_score", sa.Real),
)
def downgrade() -> None:
op.drop_table("playlist_tracks")
op.drop_table("playlists")
op.drop_table("taste_profiles")
op.drop_table("similarity_links")
op.execute("DROP INDEX IF EXISTS ix_track_embeddings_hnsw")
op.drop_table("track_embeddings")
op.drop_table("listen_events")
op.drop_table("tracks")
op.execute("DROP EXTENSION IF EXISTS vector")