Преглед изворни кода

feat: add fuzzy search to ?q

Sven пре 2 година
родитељ
комит
ff09afe4dc
6 измењених фајлова са 127 додато и 86 уклоњено
  1. 3 2
      .env.template
  2. 52 32
      README.md
  3. 2 2
      package.json
  4. 1 1
      src/index.ts
  5. 8 6
      src/server.ts
  6. 61 43
      src/songMeta.ts

+ 3 - 2
.env.template

@@ -1,3 +1,4 @@
-HTTP_PORT=8074
+HTTP_PORT=8074             # 
+HTTP_HOST=0.0.0.0          # Defaults to 0.0.0.0 (listen on all interfaces)
 GENIUS_ACCESS_TOKEN=abcdef # Gotten from POST https://api.genius.com/oauth/token or from creating a client on https://genius.com/api-clients
-AUTH_TOKENS= # Comma-separated list of HTTP bearer tokens that are excluded from rate limiting
+AUTH_TOKENS=               # Comma-separated list of HTTP bearer tokens that are excluded from rate limiting

+ 52 - 32
README.md

@@ -1,7 +1,9 @@
 # geniURL
 
 Simple JSON and XML REST API to search for song metadata and the lyrics URL on [genius.com](https://genius.com/)  
-Obtaining actual lyrics sadly isn't possible due to licensing and copyright reasons
+Authorization is not required and geniURL implements a fuzzy search that will greatly improve search results over the genius.com API.  
+  
+Obtaining actual lyrics sadly isn't possible due to licensing and copyright reasons.  
 
 <br><br>
 
@@ -32,8 +34,8 @@ All routes support gzip and deflate compression.
 
 > ### GET `/search`
 >
-> This endpoint gives you the top 10 results for a search query specified by `search_text`  
-> The returned data contains various data like the lyrics website URL, song and thumbnail metadata and more (see below).
+> This endpoint gives you up to 10 results for a search query specified by `search_text`  
+> The returned payload contains various data like the lyrics website URL, song and thumbnail metadata and more (see below).
 >
 > <br>
 >
@@ -41,17 +43,26 @@ All routes support gzip and deflate compression.
 > `?q=search%20query`  
 > This parameter should contain both the song and artist name (for best result artist name should come first, separate with a whitespace).  
 > Sometimes the song name alone might be enough but the results vary greatly.  
-> Using this parameter instead of `?artist` and `?song` will not modify the search results and so you will sometimes get blatantly wrong top matches.  
+> Using this parameter instead of `?artist` and `?song` means you will get slightly less accurate results.  
 > Make sure the search query is [percent/URL-encoded.](https://en.wikipedia.org/wiki/Percent-encoding)  
->   
+> 
+> **OR**
+> 
 > `?artist=name` and `?song=name`  
-> Instead of `?q`, you can use `?artist` and `?song` to tell geniURL to preemptively filter the search results.  
-> This is done using a fuzzy search to greatly increase the chances the correct search result will be at the top.  
+> Instead of `?q`, you can use `?artist` and `?song` to help geniURL filter the search results better, so your top results will be more accurate.  
 > Make sure these parameters are [percent/URL-encoded.](https://en.wikipedia.org/wiki/Percent-encoding)  
->   
+> 
+> <br>
+> 
+> **Optional URL Parameters:**  
 > `?format=json/xml`  
-> Use this parameter to change the response format from the default (`json`) to `xml`  
+> Use this optional parameter to change the response format from the default (`json`) to `xml`  
 > The structure of the XML data is similar to the shown JSON data.
+>   
+> `?threshold=0.7`  
+> This optional parameter can be used to change the fuzzy search threshold from the default of 0.7  
+> It has to be between 0.0 and 1.0; the lower the number, the less results you'll get but the more accurate the top results will be.  
+> 0.7 is a good middle ground but depending on your use-case you might want to play around with this.
 >
 > <br>
 > 
@@ -64,23 +75,23 @@ All routes support gzip and deflate compression.
 >     "error": false,
 >     "matches": 10,
 >     "top": {
->         "url": "https://genius.com/Artist-1-song-name-lyrics",
->         "path": "/Artist-1-song-name-lyrics",
+>         "url": "https://genius.com/Artist-Foo-song-name-lyrics",
+>         "path": "/Artist-Foo-song-name-lyrics",
 >         "language": "en",
 >         "meta": {
 >             "title": "Song Name",
->             "fullTitle": "Song Name by Artist 1 (ft. Artist 2)",
->             "artists": "Artist 1 (ft. Artist 2)",
+>             "fullTitle": "Song Name by Artist Foo (ft. Artist Bar)",
+>             "artists": "Artist Foo (ft. Artist Bar)",
 >             "primaryArtist": {
->                 "name": "Artist 1",
->                 "url": "https://genius.com/artists/Artist-1",
+>                 "name": "Artist Foo",
+>                 "url": "https://genius.com/artists/Artist-Foo",
 >                 "headerImage": "https://images.genius.com/...",
 >                 "image": "https://images.genius.com/..."
 >             },
 >             "featuredArtists": [
 >                 {
->                     "name": "Featured Artist 1",
->                     "url": "https://genius.com/artists/Featured-Artist-1",
+>                     "name": "Artist Bar",
+>                     "url": "https://genius.com/artists/Artist-Bar",
 >                     "headerImage": "https://images.genius.com/...",
 >                     "image": "https://images.genius.com/..."
 >                 }
@@ -139,7 +150,7 @@ All routes support gzip and deflate compression.
 
 > ### GET `/search/top`
 >
-> This endpoint is the same as `/search`, but it only gives the top result.  
+> This endpoint is similar to `/search`, but it only gives the top result.  
 > Use this if you are only interested in the top result and want to reduce traffic.
 >
 > <br>
@@ -148,17 +159,26 @@ All routes support gzip and deflate compression.
 > `?q=search%20query`  
 > This parameter should contain both the song and artist name (for best result artist name should come first, separate with a whitespace).  
 > Sometimes the song name alone might be enough but the results vary greatly.  
-> Using this parameter instead of `?artist` and `?song` will not modify the search result and so you will sometimes get a blatantly wrong top match.  
+> Using this parameter instead of `?artist` and `?song` means you will get slightly less accurate results.  
 > Make sure the search query is [percent/URL-encoded.](https://en.wikipedia.org/wiki/Percent-encoding)  
->   
+> 
+> **OR**
+> 
 > `?artist=name` and `?song=name`  
-> Instead of `?q`, you can use `?artist` and `?song` to tell geniURL to preemptively filter the search results.  
-> This is done using a fuzzy search to greatly increase the chances the correct search result will be returned.  
+> Instead of `?q`, you can use `?artist` and `?song` to help geniURL filter the search results better, so your top results will be more accurate.  
 > Make sure these parameters are [percent/URL-encoded.](https://en.wikipedia.org/wiki/Percent-encoding)  
->   
+> 
+> <br><br>
+> 
+> **Optional URL Parameters:**  
 > `?format=json/xml`  
-> Use this parameter to change the response format from the default (`json`) to `xml`  
+> Use this optional parameter to change the response format from the default (`json`) to `xml`  
 > The structure of the XML data is similar to the shown JSON data.
+>   
+> `?threshold=0.7`  
+> This optional parameter can be used to change the fuzzy search threshold from the default of 0.7  
+> It has to be between 0.0 and 1.0; the lower the number, the less results you'll get but the more accurate the top results will be.  
+> 0.7 is a good middle ground but depending on your use-case you might want to play around with this.
 >
 > <br>
 > 
@@ -170,23 +190,23 @@ All routes support gzip and deflate compression.
 > {
 >     "error": false,
 >     "matches": 1,
->     "url": "https://genius.com/Artist-1-song-name-lyrics",
->     "path": "/Artist-1-song-name-lyrics",
+>     "url": "https://genius.com/Artist-Foo-song-name-lyrics",
+>     "path": "/Artist-Foo-song-name-lyrics",
 >     "language": "en",
 >     "meta": {
 >         "title": "Song Name",
->         "fullTitle": "Song Name by Artist 1 (ft. Artist 2)",
->         "artists": "Artist 1 (ft. Artist 2)",
+>         "fullTitle": "Song Name by Artist Foo (ft. Artist Bar)",
+>         "artists": "Artist Foo (ft. Artist Bar)",
 >         "primaryArtist": {
->             "name": "Artist 1",
->             "url": "https://genius.com/artists/Artist-1",
+>             "name": "Artist Foo",
+>             "url": "https://genius.com/artists/Artist-Foo",
 >             "headerImage": "https://images.genius.com/...",
 >             "image": "https://images.genius.com/..."
 >         },
 >         "featuredArtists": [
 >             {
->                 "name": "Featured Artist 1",
->                 "url": "https://genius.com/artists/Featured-Artist-1",
+>                 "name": "Artist Bar",
+>                 "url": "https://genius.com/artists/Artist-Bar",
 >                 "headerImage": "https://images.genius.com/...",
 >                 "image": "https://images.genius.com/..."
 >             }

+ 2 - 2
package.json

@@ -14,8 +14,8 @@
   "keywords": [
     "rest-api",
     "lyrics",
-    "rest-proxy",
-    "lyrics-search"
+    "lyrics-search",
+    "song-metadata"
   ],
   "author": {
     "name": "Sv443",

+ 1 - 1
src/index.ts

@@ -12,7 +12,7 @@ async function init()
 
     try
     {
-        server.init();
+        await server.init();
 
         stage = "(done)";
     }

+ 8 - 6
src/server.ts

@@ -29,27 +29,29 @@ const authTokens = getAuthTokens();
 
 export async function init()
 {
-    const port = parseInt(String(process.env.HTTP_PORT));
+    const port = parseInt(String(process.env.HTTP_PORT ?? "").trim());
+    const hostRaw = String(process.env.HTTP_HOST ?? "").trim();
+    const host = hostRaw.length < 1 ? "0.0.0.0" : hostRaw;
 
     if(await portUsed(port))
-        return error(`TCP port ${port} is already used`, undefined, true);
+        return error(`TCP port ${port} is already used or invalid`, undefined, true);
 
     // on error
-    app.use((err: any, req: Request, res: Response, next: NextFunction) => {
+    app.use((err: unknown, req: Request, res: Response, next: NextFunction) => {
         if(typeof err === "string" || err instanceof Error)
             return respond(res, "serverError", `General error in HTTP server: ${err.toString()}`, req?.query?.format ? String(req.query.format) : undefined);
         else
             return next();
     });
 
-    const listener = app.listen(port, () => {
+    const listener = app.listen(port, host, () => {
         app.disable("x-powered-by");
 
         // rate limiting
         app.use(async (req, res, next) => {
             const fmt = req?.query?.format ? String(req.query.format) : undefined;
             const { authorization } = req.headers;
-            const authHeader = authorization?.startsWith("Bearer") ? authorization.substring(7) : authorization;
+            const authHeader = authorization?.startsWith("Bearer ") ? authorization.substring(7) : authorization;
 
             res.setHeader("API-Info", `geniURL v${packageJson.version} (${packageJson.homepage})`);
 
@@ -70,7 +72,7 @@ export async function init()
 
         registerEndpoints();
 
-        console.log(k.green(`Ready on port ${port}`));
+        console.log(k.green(`Listening on ${host}:${port}`));
     });
 
     listener.on("error", (err) => error("General server error", err, true));

+ 61 - 43
src/songMeta.ts

@@ -1,7 +1,7 @@
 import axios from "axios";
 import Fuse from "fuse.js";
 import { nanoid } from "nanoid";
-import { clamp } from "svcorelib";
+import { allOfType, clamp } from "svcorelib";
 import type { ApiSearchResult, SongMeta } from "./types";
 
 type MetaSearchHit = SongMeta & { uuid?: string; };
@@ -24,17 +24,23 @@ const defaultFuzzyThreshold = 0.7;
  * Returns meta information about the top results of a search using the genius API
  * @param param0 Pass an object with either a `q` prop or the props `artist` and `song` to make use of fuzzy filtering
  */
-export async function getMeta({ q, artist, song, threshold = defaultFuzzyThreshold }: GetMetaProps): Promise<GetMetaResult | null>
+export async function getMeta({
+    q,
+    artist,
+    song,
+    threshold,
+}: GetMetaProps): Promise<GetMetaResult | null>
 {
     const accessToken = process.env.GENIUS_ACCESS_TOKEN ?? "ERR_NO_ENV";
 
     const query = q ? q : `${artist} ${song}`;
+    const searchByQuery = allOfType([artist, song], "undefined");
 
     const { data: { response }, status } = await axios.get<ApiSearchResult>(`https://api.genius.com/search?q=${encodeURIComponent(query)}`, {
         headers: { "Authorization": `Bearer ${accessToken}` },
     });
 
-    if(isNaN(threshold))
+    if(threshold === undefined || isNaN(threshold))
         threshold = defaultFuzzyThreshold;
     threshold = clamp(threshold, 0.0, 1.0);
 
@@ -77,55 +83,67 @@ export async function getMeta({ q, artist, song, threshold = defaultFuzzyThresho
                 id: result.id ?? null,
             }));
 
-        if(artist && song)
-        {
-            const scoreMap: Record<string, number> = {};
+        const scoreMap: Record<string, number> = {};
 
-            hits = hits.map(h => {
-                h.uuid = nanoid();
-                return h;
-            }) as (SongMeta & { uuid: string })[];
+        hits = hits.map(h => {
+            h.uuid = nanoid();
+            return h;
+        }) as (SongMeta & { uuid: string })[];
 
-            const fuseOpts: Fuse.IFuseOptions<MetaSearchHit> = {
-                includeScore: true,
-                threshold,
-            };
+        const fuseOpts: Fuse.IFuseOptions<MetaSearchHit> = {
+            includeScore: true,
+            threshold,
+        };
 
-            const titleFuse = new Fuse(hits, { ...fuseOpts, keys: [ "meta.title" ] });
-            const artistFuse = new Fuse(hits, { ...fuseOpts, keys: [ "meta.primaryArtist.name" ] });
+        const addScores = (searchRes: Fuse.FuseResult<SongMeta & { uuid?: string; }>[]) =>
+            searchRes.forEach(({ item, score }) => {
+                if(!item.uuid || !score)
+                    return;
 
-            /** @param {({ item: { uuid: string }, score: number })[]} searchRes */
-            const addScores = (searchRes: Fuse.FuseResult<SongMeta & { uuid?: string; }>[]) =>
-                searchRes.forEach(({ item, score }) => {
-                    if(!item.uuid || !score)
-                        return;
+                if(!scoreMap[item.uuid])
+                    scoreMap[item.uuid] = score;
+                else
+                    scoreMap[item.uuid] += score;
+            });
 
-                    if(!scoreMap[item.uuid])
-                        scoreMap[item.uuid] = score;
-                    else
-                        scoreMap[item.uuid] += score;
-                });
+        if(song && artist) {
+            const titleFuse = new Fuse(hits, { ...fuseOpts, keys: [ "meta.title" ] });
+            const artistFuse = new Fuse(hits, { ...fuseOpts, keys: [ "meta.primaryArtist.name" ] });
 
             addScores(titleFuse.search(song));
             addScores(artistFuse.search(artist));
-
-            const bestMatches = Object.entries(scoreMap)
-                .sort(([, valA], [, valB]) => valA > valB ? 1 : -1)
-                .map(e => e[0]);
-
-            const oldHits = [...hits];
-
-            hits = bestMatches
-                .map(uuid => oldHits.find(h => h.uuid === uuid))
-                .map(hit => {
-                    if(hit)
-                    {
-                        delete hit.uuid;
-                        return hit;
-                    }
-                })
-                .filter(h => h !== undefined) as MetaSearchHit[];
         }
+        else {
+            const queryFuse = new Fuse(hits, {
+                ...fuseOpts,
+                ignoreLocation: true,
+                keys: [ "meta.title", "meta.primaryArtist.name" ],
+            });
+
+            let queryParts = [query];
+            if(query.match(/\s-\s/))
+                queryParts = query.split(/\s-\s/);
+
+            for(const part of queryParts)
+                addScores(queryFuse.search(part.trim()));
+        }
+
+        // TODO: reduce the amount of remapping cause it takes long
+
+        const bestMatches = Object.entries(scoreMap)
+            .sort(([, valA], [, valB]) => valA > valB ? 1 : -1)
+            .map(e => e[0]);
+
+        const oldHits = [...hits];
+
+        hits = bestMatches
+            .map(uuid => oldHits.find(h => h.uuid === uuid))
+            .map(hit => {
+                if(!hit) return undefined;
+                delete hit.uuid;
+                return hit;
+            })
+            .filter(h => h !== undefined) as MetaSearchHit[];
 
         return {
             top: hits[0] as MetaSearchHit,