Procházet zdrojové kódy

feat: better lyrics url resolving

Sv443 před 1 rokem
rodič
revize
529f6fe795

+ 4 - 0
assets/require.json

@@ -3,6 +3,10 @@
     "pkgName": "@sv443-network/userutils",
     "path": "dist/index.global.js"
   },
+  {
+    "pkgName": "fuse.js",
+    "path": "dist/fuse.basic.js"
+  },
   {
     "pkgName": "marked",
     "path": "lib/marked.umd.js"

+ 6 - 6
contributing.md

@@ -265,7 +265,7 @@ The usage and example blocks on each are written in TypeScript but can be used i
   - [getFeatures()](#getfeatures) - Returns the current BYTM feature configuration object
   - [saveFeatures()](#savefeatures) - Overwrites the current BYTM feature configuration object with the provided one
 - Lyrics:
-  - [fetchLyricsUrl()](#fetchlyricsurl) - Fetches the URL to the lyrics page for the specified song
+  - [fetchLyricsUrlTop()](#fetchlyricsurltop) - Fetches the URL to the lyrics page for the specified song
   - [getLyricsCacheEntry()](#getlyricscacheentry) - Tries to find a URL entry in the in-memory cache for the specified song
   - [sanitizeArtists()](#sanitizeartists) - Sanitizes the specified artist string to be used in fetching a lyrics URL
   - [sanitizeSong()](#sanitizesong) - Sanitizes the specified song title string to be used in fetching a lyrics URL
@@ -601,14 +601,14 @@ The usage and example blocks on each are written in TypeScript but can be used i
 
 <br>
 
-> #### fetchLyricsUrl()
+> #### fetchLyricsUrlTop()
 > Usage:
 > ```ts
-> unsafeWindow.BYTM.fetchLyricsUrl(artist: string, song: string): Promise<string | undefined>
+> unsafeWindow.BYTM.fetchLyricsUrlTop(artist: string, song: string): Promise<string | undefined>
 > ```
 >   
 > Description:  
-> Fetches the URL to the lyrics page for the specified song.  
+> Fetches the top result's URL to the lyrics page for the specified song.  
 > If there is already an entry in the in-memory cache for the song, it will be returned without fetching anything new.  
 > URLs that are returned by this function are added to the cache automatically.  
 > Returns undefined if there was an error while fetching the URL.  
@@ -623,7 +623,7 @@ The usage and example blocks on each are written in TypeScript but can be used i
 > 
 > ```ts
 > async function getLyricsUrl() {
->   const lyricsUrl = await unsafeWindow.BYTM.fetchLyricsUrl("Michael Jackson", "Thriller");
+>   const lyricsUrl = await unsafeWindow.BYTM.fetchLyricsUrlTop("Michael Jackson", "Thriller");
 > 
 >   if(lyricsUrl)
 >     console.log(`The lyrics URL for Michael Jackson's Thriller is '${lyricsUrl}'`);
@@ -646,7 +646,7 @@ The usage and example blocks on each are written in TypeScript but can be used i
 > Description:  
 > Tries to find an entry in the in-memory cache for the specified song.  
 > You can find the structure of the `LyricsCacheEntry` type in the file [`src/types.ts`](src/types.ts)  
-> Contrary to [`fetchLyricsUrl()`](#fetchlyricsurl), this function does not fetch anything new if there is no entry in the cache.  
+> Contrary to [`fetchLyricsUrlTop()`](#fetchlyricsurltop), this function does not fetch anything new if there is no entry in the cache.  
 >   
 > Arguments:  
 > - `artist` - The main artist of the song to grab the lyrics URL for.  

+ 9 - 0
package-lock.json

@@ -10,6 +10,7 @@
       "license": "AGPL-3.0-only",
       "dependencies": {
         "@sv443-network/userutils": "^5.0.0",
+        "fuse.js": "^7.0.0",
         "marked": "^12.0.0",
         "nanoevents": "^9.0.0"
       },
@@ -2051,6 +2052,14 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/fuse.js": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz",
+      "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==",
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/get-caller-file": {
       "version": "2.0.5",
       "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",

+ 1 - 0
package.json

@@ -58,6 +58,7 @@
   },
   "dependencies": {
     "@sv443-network/userutils": "^5.0.0",
+    "fuse.js": "^7.0.0",
     "marked": "^12.0.0",
     "nanoevents": "^9.0.0"
   },

+ 4 - 2
rollup.config.mjs

@@ -53,8 +53,9 @@ export default (/**@type {import("./src/types").RollupArgs}*/ args) => (async ()
       sourcemap: mode === "development",
       compact: mode === "development",
       globals: {
-        "marked": "marked",
         "@sv443-network/userutils": "UserUtils",
+        "fuse.js": "Fuse",
+        "marked": "marked",
       },
     },
     onwarn(warning) {
@@ -65,8 +66,9 @@ export default (/**@type {import("./src/types").RollupArgs}*/ args) => (async ()
       }
     },
     external: [
-      "marked",
       "@sv443-network/userutils",
+      "fuse.js",
+      "marked",
     ],
   };
 

+ 158 - 14
src/features/lyrics.ts

@@ -1,4 +1,5 @@
-import { ConfigManager, compress, decompress, fetchAdvanced, insertAfter } from "@sv443-network/userutils";
+import { ConfigManager, autoPlural, clamp, compress, decompress, fetchAdvanced, insertAfter } from "@sv443-network/userutils";
+import Fuse from "fuse.js";
 import { constructUrlString, error, getResourceUrl, info, log, onSelectorOld, warn, t, tp, compressionSupported } from "../utils";
 import { emitInterface } from "../interface";
 import { compressionFormat, scriptInfo } from "../constants";
@@ -7,7 +8,7 @@ import type { LyricsCacheEntry } from "../types";
 /** Base URL of geniURL */
 export const geniUrlBase = "https://api.sv443.net/geniurl";
 /** GeniURL endpoint that gives song metadata when provided with a `?q` or `?artist` and `?song` parameter - [more info](https://api.sv443.net/geniurl) */
-const geniURLSearchTopUrl = `${geniUrlBase}/search/top`;
+const geniURLSearchUrl = `${geniUrlBase}/search`;
 /** Ratelimit budget timeframe in seconds - should reflect what's in geniURL's docs */
 const geniUrlRatelimitTimeframe = 30;
 
@@ -99,6 +100,35 @@ export function addLyricsCacheEntry(artist: string, song: string, url: string) {
   return lyricsCache.setData({ cache });
 }
 
+/**
+ * Adds the provided entry into the lyrics URL cache, synchronously to RAM and asynchronously to GM storage  
+ * Also adds a penalty to the viewed timestamp and added timestamp to decrease entry's lifespan in cache  
+ *   
+ * ⚠️ {@linkcode artist} and {@linkcode song} need to be sanitized first!
+ * @param penaltyFr Fraction to remove from the timestamp values - has to be between 0 and 1 - default is 0 (no penalty) - (0.25 = only penalized by a quarter of the predefined max penalty)
+ */
+export function addLyricsCacheEntryPenalized(artist: string, song: string, url: string, penaltyFr = 0) {
+  const { cache } = lyricsCache.getData();
+
+  penaltyFr = clamp(penaltyFr, 0, 1);
+
+  const viewedPenalty = 1000 * 60 * 60 * 24 * 5 * penaltyFr; // 5 days
+  const addedPenalty = 1000 * 60 * 60 * 24 * 15 * penaltyFr; // 15 days
+  cache.push({
+    artist,
+    song,
+    url,
+    viewed: Date.now() - viewedPenalty,
+    added: Date.now() - addedPenalty,
+  } satisfies LyricsCacheEntry);
+
+  cache.sort((a, b) => b.viewed - a.viewed);
+  if(cache.length > maxLyricsCacheSize)
+    cache.pop();
+
+  return lyricsCache.setData({ cache });
+}
+
 //#MARKER media control bar
 
 let currentSongTitle = "";
@@ -198,6 +228,9 @@ async function addActualMediaCtrlLyricsBtn(likeContainer: HTMLElement) {
 
 /** Removes everything in parentheses from the passed song name */
 export function sanitizeSong(songName: string) {
+  if(typeof songName !== "string")
+    return songName;
+
   const parensRegex = /\(.+\)/gmi;
   const squareParensRegex = /\[.+\]/gmi;
 
@@ -250,7 +283,7 @@ export async function getCurrentLyricsUrl() {
     if(!artistName)
       return undefined;
 
-    const url = await fetchLyricsUrl(sanitizeArtists(artistName), sanitizeSong(songName));
+    const url = await fetchLyricsUrlTop(sanitizeArtists(artistName), sanitizeSong(songName));
 
     if(url) {
       emitInterface("bytm:lyricsLoaded", {
@@ -269,17 +302,31 @@ export async function getCurrentLyricsUrl() {
   }
 }
 
-/** Fetches the actual lyrics URL from geniURL - **the passed parameters need to be sanitized first!** */
-export async function fetchLyricsUrl(artist: string, song: string): Promise<string | undefined> {
+/** Fetches the top lyrics URL result from geniURL - **the passed parameters need to be sanitized first!** */
+export async function fetchLyricsUrlTop(artist: string, song: string): Promise<string | undefined> {
+  try {
+    return (await fetchLyricsUrls(artist, song))?.[0]?.url;
+  }
+  catch(err) {
+    error("Couldn't get lyrics URL due to error:", err);
+    return undefined;
+  }
+}
+
+/**
+ * Fetches the 5 best matching lyrics URLs from geniURL using a combo exact-ish and fuzzy search  
+ * **the passed parameters need to be sanitized first!**
+ */
+export async function fetchLyricsUrls(artist: string, song: string): Promise<Omit<LyricsCacheEntry, "added" | "viewed">[] | undefined> {
   try {
     const cacheEntry = getLyricsCacheEntry(artist, song);
     if(cacheEntry) {
       info(`Found lyrics URL in cache: ${cacheEntry.url}`);
-      return cacheEntry.url;
+      return [cacheEntry];
     }
 
     const startTs = Date.now();
-    const fetchUrl = constructUrlString(geniURLSearchTopUrl, {
+    const fetchUrl = constructUrlString(geniURLSearchUrl, {
       disableFuzzy: null,
       utm_source: "BetterYTM",
       utm_content: `v${scriptInfo.version}`,
@@ -287,7 +334,7 @@ export async function fetchLyricsUrl(artist: string, song: string): Promise<stri
       song,
     });
 
-    log(`Requesting URL from geniURL at '${fetchUrl}'`);
+    log(`Requesting URLs from geniURL at '${fetchUrl}'`);
 
     const fetchRes = await fetchAdvanced(fetchUrl);
     if(fetchRes.status === 429) {
@@ -296,22 +343,119 @@ export async function fetchLyricsUrl(artist: string, song: string): Promise<stri
       return undefined;
     }
     else if(fetchRes.status < 200 || fetchRes.status >= 300) {
-      error(`Couldn't fetch lyrics URL from geniURL - status: ${fetchRes.status} - response: ${(await fetchRes.json()).message ?? await fetchRes.text() ?? "(none)"}`);
+      error(`Couldn't fetch lyrics URLs from geniURL - status: ${fetchRes.status} - response: ${(await fetchRes.json()).message ?? await fetchRes.text() ?? "(none)"}`);
       return undefined;
     }
     const result = await fetchRes.json();
 
-    if(typeof result === "object" && result.error) {
+    if(typeof result === "object" && result.error || !result || !result.all) {
       error("Couldn't fetch lyrics URL:", result.message);
       return undefined;
     }
 
-    const url = result.url;
+    const allResults = result.all as {
+      url: string;
+      meta: {
+        title: string;
+        fullTitle: string;
+        artists: string;
+        primaryArtist: {
+          name: string;
+        };
+      };
+    }[];
+
+    if(allResults.length === 0) {
+      warn("No lyrics URL found for the provided song");
+      return undefined;
+    }
 
-    info(`Found lyrics URL (after ${Date.now() - startTs}ms): ${url}`);
-    addLyricsCacheEntry(artist, song, url);
+    const exactish = (input: string) => {
+      return input.toLowerCase()
+        .replace(/[\s\-_&,.()[\]]+/gm, "");
+    };
+
+    const allResultsSan = allResults
+      .filter(({ meta, url }) => (meta.title || meta.fullTitle) && meta.artists && url)
+      .map(({ meta, url }) => ({
+        meta: {
+          ...meta,
+          title: sanitizeSong(String(meta.title ?? meta.fullTitle)),
+          artists: sanitizeArtists(String(meta.artists)),
+        },
+        url,
+      }));
 
-    return url;
+    // exact-ish matches, best matching one first
+    const exactishResults = [...allResultsSan].sort((a, b) => {
+      const aTitleScore = exactish(a.meta.title).localeCompare(exactish(song));
+      const bTitleScore = exactish(b.meta.title).localeCompare(exactish(song));
+      const aArtistScore = exactish(a.meta.primaryArtist.name).localeCompare(exactish(artist));
+      const bArtistScore = exactish(b.meta.primaryArtist.name).localeCompare(exactish(artist));
+
+      return aTitleScore + aArtistScore - bTitleScore - bArtistScore;
+    });
+
+    // use fuse.js for fuzzy match
+    // search song title and artist separately, then combine the scores
+    const titleFuse = new Fuse([...allResultsSan], {
+      keys: ["title"],
+      includeScore: true,
+      threshold: 0.4,
+    });
+
+    const artistFuse = new Fuse([...allResultsSan], {
+      keys: ["primaryArtist.name"],
+      includeScore: true,
+      threshold: 0.4,
+    });
+
+    let fuzzyResults: typeof allResultsSan = allResultsSan.map(r => {
+      const titleRes = titleFuse.search(r.meta.title);
+      const artistRes = artistFuse.search(r.meta.primaryArtist.name);
+
+      const titleScore = titleRes[0]?.score ?? 0;
+      const artistScore = artistRes[0]?.score ?? 0;
+
+      return {
+        ...r,
+        score: titleScore + artistScore,
+      };
+    });
+    // I love TS
+    fuzzyResults = (fuzzyResults as (typeof allResultsSan[0] & { score: number })[])
+      .map(({ score, ...rest }) => rest as typeof allResultsSan[0]);
+
+    const hasExactMatch = exactishResults.slice(0, 3).includes(fuzzyResults[0]);
+
+    const finalResults = [
+      ...(
+        hasExactMatch
+          ? [fuzzyResults[0]]
+          : []
+      ),
+      ...fuzzyResults.slice(1),
+    ].slice(0, 5);
+
+    // add results to the cache with a penalty to their time to live
+    // so every entry is deleted faster if it's not considered as relevant
+    finalResults.forEach(({ meta: { artists, title }, url }, i) => {
+      const penaltyFraction = hasExactMatch
+        // if there's an exact match, give it 0 penalty and penalize all other results with the full value
+        ? i === 0 ? 0 : 1
+        // if there's no exact match, penalize all results with a fraction of the full penalty since they're more likely to be unrelated
+        : 0.6;
+      addLyricsCacheEntryPenalized(sanitizeArtists(artists), sanitizeSong(title), url, penaltyFraction);
+    });
+
+    finalResults.length > 0 && log("Found", finalResults.length, "lyrics", autoPlural("URL", finalResults), "in", Date.now() - startTs, "ms:", finalResults);
+
+    // returns search results sorted by relevance
+    return finalResults.map(r => ({
+      artist: r.meta.primaryArtist.name,
+      song: r.meta.title,
+      url: r.url,
+    }));
   }
   catch(err) {
     error("Couldn't get lyrics URL due to error:", err);

+ 2 - 4
src/features/songLists.ts

@@ -2,7 +2,7 @@ import { autoPlural, openInNewTab, pauseFor } from "@sv443-network/userutils";
 import { clearInner, error, getResourceUrl, log, onSelectorOld, t, warn } from "../utils";
 import { SiteEventsMap, siteEvents } from "../siteEvents";
 import { emitInterface } from "../interface";
-import { fetchLyricsUrl, createLyricsBtn, sanitizeArtists, sanitizeSong, getLyricsCacheEntry, splitVideoTitle } from "./lyrics";
+import { fetchLyricsUrlTop, createLyricsBtn, sanitizeArtists, sanitizeSong, getLyricsCacheEntry, splitVideoTitle } from "./lyrics";
 import type { FeatureConfig, LyricsCacheEntry } from "../types";
 import "./songLists.css";
 
@@ -40,8 +40,6 @@ export async function initQueueButtons() {
   }
 
   // generic lists
-  // TODO:FIXME: dragging the items around removes the queue buttons
-
   const addGenericListQueueBtns = (listElem: HTMLElement) => {
     if(listElem.classList.contains("bytm-list-has-queue-btns"))
       return;
@@ -168,7 +166,7 @@ async function addQueueButtons(
           imgEl.classList.add("bytm-spinner");
         }
 
-        lyricsUrl = (cachedLyricsEntry as unknown as LyricsCacheEntry)?.url ?? await fetchLyricsUrl(artistsSan, songSan);
+        lyricsUrl = (cachedLyricsEntry as unknown as LyricsCacheEntry)?.url ?? await fetchLyricsUrlTop(artistsSan, songSan);
 
         if(lyricsUrl) {
           emitInterface("bytm:lyricsLoaded", {

+ 2 - 2
src/interface.ts

@@ -3,7 +3,7 @@ import { mode, branch, scriptInfo } from "./constants";
 import { getResourceUrl, getSessionId, getVideoTime, log, setLocale, getLocale, hasKey, hasKeyFor, t, tp, type TrLocale } from "./utils";
 import { addSelectorListener } from "./observers";
 import { getFeatures, saveFeatures } from "./config";
-import { fetchLyricsUrl, getLyricsCacheEntry, sanitizeArtists, sanitizeSong } from "./features/lyrics";
+import { fetchLyricsUrlTop, getLyricsCacheEntry, sanitizeArtists, sanitizeSong } from "./features/lyrics";
 import type { SiteEventsMap } from "./siteEvents";
 
 const { getUnsafeWindow } = UserUtils;
@@ -39,7 +39,7 @@ const globalFuncs = {
   tp,
   getFeatures,
   saveFeatures,
-  fetchLyricsUrl,
+  fetchLyricsUrlTop,
   getLyricsCacheEntry,
   sanitizeArtists,
   sanitizeSong,