1
0
Эх сурвалжийг харах

feat: add unicode normalization (#15)

Sv443 2 жил өмнө
parent
commit
c8b1b91fae
4 өөрчлөгдсөн 39 нэмэгдсэн , 22 устгасан
  1. 6 0
      src/axios.ts
  2. 10 0
      src/constants.ts
  3. 2 2
      src/server.ts
  4. 21 20
      src/songData.ts

+ 6 - 0
src/axios.ts

@@ -3,3 +3,9 @@ import { default as _axios } from "axios";
 export const axios = _axios.create({
     timeout: 1000 * 15,
 });
+
+export function getAxiosAuthConfig(authToken?: string) {
+    return authToken ? {
+        headers: { "Authorization": `Bearer ${authToken}` },
+    } : {};
+}

+ 10 - 0
src/constants.ts

@@ -1,2 +1,12 @@
 /** Set of all supported [ISO 639-1 language codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) */
 export const langCodes = new Set<string>(["aa","ab","ae","af","ak","am","an","ar","as","av","ay","az","ba","be","bg","bh","bi","bm","bn","bo","br","bs","ca","ce","ch","co","cr","cs","cu","cv","cy","da","de","dv","dz","ee","el","en","eo","es","et","eu","fa","ff","fi","fj","fo","fr","fy","ga","gd","gl","gn","gu","gv","ha","he","hi","ho","hr","ht","hu","hy","hz","ia","id","ie","ig","ii","ik","io","is","it","iu","ja","jv","ka","kg","ki","kj","kk","kl","km","kn","ko","kr","ks","ku","kv","kw","ky","la","lb","lg","li","ln","lo","lt","lu","lv","mg","mh","mi","mk","ml","mn","mr","ms","mt","my","na","nb","nd","ne","ng","nl","nn","no","nr","nv","ny","oc","oj","om","or","os","pa","pi","pl","ps","pt","qu","rm","rn","ro","ru","rw","sa","sc","sd","se","sg","si","sk","sl","sm","sn","so","sq","sr","ss","st","su","sv","sw","ta","te","tg","th","ti","tk","tl","tn","to","tr","ts","tt","tw","ty","ug","uk","ur","uz","ve","vi","vo","wa","wo","xh","yi","yo","za","zh","zu"]);
+
+/** Map of regex and replacement char to normalize fields before fuzzy filtering them */
+export const charReplacements = new Map<RegExp, string>([
+    ["`´’︐︑ʻ", "'"],
+    [",", ","],
+    ["—─", "-"],
+    ["“”", "\""],
+].map(
+    ([k, v]) => ([new RegExp(`[${k!}]`, "g"), v!])
+));

+ 2 - 2
src/server.ts

@@ -21,8 +21,8 @@ app.use(compression());
 app.disable("x-powered-by");
 
 const rateLimiter = new RateLimiterMemory({
-    points: 5,
-    duration: 10,
+    points: 10,
+    duration: 30,
 });
 
 const authTokens = getAuthTokens();

+ 21 - 20
src/songData.ts

@@ -1,11 +1,12 @@
 /* eslint-disable no-control-regex */
 
-import { axios } from "./axios";
 import Fuse from "fuse.js";
 import { nanoid } from "nanoid";
-import { clamp, Stringifiable } from "svcorelib";
+import { clamp } from "svcorelib";
+
+import { axios, getAxiosAuthConfig } from "./axios";
+import { charReplacements } from "./constants";
 import type { Album, ApiSearchResult, ApiSongResult, GetMetaArgs, GetMetaResult, GetTranslationsArgs, MetaSearchHit, SongMeta, SongTranslation } from "./types";
-import { getAxiosAuthConfig } from "./utils";
 
 const defaultFuzzyThreshold = 0.65;
 
@@ -33,7 +34,8 @@ export async function getMeta({
 
     if(threshold === undefined || isNaN(threshold))
         threshold = defaultFuzzyThreshold;
-    threshold = clamp(threshold, 0.0, 1.0);
+    else
+        threshold = clamp(threshold, 0.0, 1.0);
 
     if(status >= 200 && status < 300 && Array.isArray(response?.hits))
     {
@@ -47,18 +49,18 @@ export async function getMeta({
                 path: result.path,
                 language: result.language ?? null,
                 meta: {
-                    title: formatStr(result.title),
-                    fullTitle: formatStr(result.full_title),
-                    artists: formatStr(result.artist_names),
+                    title: normalize(result.title),
+                    fullTitle: normalize(result.full_title),
+                    artists: normalize(result.artist_names),
                     primaryArtist: {
-                        name: result.primary_artist.name ? formatStr(result.primary_artist.name) : null,
-                        url: result.primary_artist.url ?? null,
-                        headerImage: result.primary_artist.header_image_url ?? null,
-                        image: result.primary_artist.image_url ?? null,
+                        name: result.primary_artist?.name ?? null,
+                        url: result.primary_artist?.url ?? null,
+                        headerImage: result.primary_artist?.header_image_url ?? null,
+                        image: result.primary_artist?.image_url ?? null,
                     },
                     featuredArtists: Array.isArray(result.featured_artists) && result.featured_artists.length > 0
                         ? result.featured_artists.map((a) => ({
-                            name: a.name ? formatStr(a.name) : null,
+                            name: a.name ?? null,
                             url: a.url ?? null,
                             headerImage: a.header_image_url ?? null,
                             image: a.image_url ?? null,
@@ -229,16 +231,15 @@ export async function getAlbum(songId: number): Promise<Album | null> {
     }
 }
 
-/**
- * Removes invisible characters and control characters from a string  
- * @throws Throws TypeError if the input is not a string
- */
-function formatStr(str: Stringifiable): string
+/** Removes invisible characters and control characters from a string and replaces weird unicode variants with the regular ASCII characters */
+function normalize(str: string): string
 {
-    if(!str || !str.toString || typeof str !== "string")
-        throw new TypeError("formatStr(): input is not a string");
+    charReplacements.forEach((val, regex) => {
+        if(str.match(regex))
+            str = str.replace(regex, val);
+    });
 
-    return str.toString()
+    return str
         .replace(/[\u0000-\u001F\u007F-\u009F\u200B]/g, "") // 0-width spaces & control characters
         .replace(/\u00A0/g, " "); // non-standard 1-width spaces
 }