Explorar o código

feat: optimize normalization

Sv443 %!s(int64=2) %!d(string=hai) anos
pai
achega
62214a59bd
Modificáronse 3 ficheiros con 15 adicións e 9 borrados
  1. 1 1
      changelog.md
  2. 3 5
      src/constants.ts
  3. 11 3
      src/songData.ts

+ 1 - 1
changelog.md

@@ -13,7 +13,7 @@
 - Added route `/translations/:songId` to receive info about a song's translation pages
 - Added route `/album/:songId` to get info about the album that the provided song is in
 - Added parameter `?preferLang=en` to always rank results of a certain language higher than the rest
-- geniURL will now replace inconsistent unicode characters with ASCII ones ([#15](https://github.com/Sv443/geniURL/issues/15))
+- geniURL will now replace inconsistent unicode characters in the properties `title`, `fullTitle`, and `artists` ([#15](https://github.com/Sv443/geniURL/issues/15))
 
 <br>
 

+ 3 - 5
src/constants.ts

@@ -1,13 +1,11 @@
 /** Set of all supported [ISO 639-1 language codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) */
 export const langCodes = new Set<string>(["aa","ab","ae","af","ak","am","an","ar","as","av","ay","az","ba","be","bg","bh","bi","bm","bn","bo","br","bs","ca","ce","ch","co","cr","cs","cu","cv","cy","da","de","dv","dz","ee","el","en","eo","es","et","eu","fa","ff","fi","fj","fo","fr","fy","ga","gd","gl","gn","gu","gv","ha","he","hi","ho","hr","ht","hu","hy","hz","ia","id","ie","ig","ii","ik","io","is","it","iu","ja","jv","ka","kg","ki","kj","kk","kl","km","kn","ko","kr","ks","ku","kv","kw","ky","la","lb","lg","li","ln","lo","lt","lu","lv","mg","mh","mi","mk","ml","mn","mr","ms","mt","my","na","nb","nd","ne","ng","nl","nn","no","nr","nv","ny","oc","oj","om","or","os","pa","pi","pl","ps","pt","qu","rm","rn","ro","ru","rw","sa","sc","sd","se","sg","si","sk","sl","sm","sn","so","sq","sr","ss","st","su","sv","sw","ta","te","tg","th","ti","tk","tl","tn","to","tr","ts","tt","tw","ty","ug","uk","ur","uz","ve","vi","vo","wa","wo","xh","yi","yo","za","zh","zu"]);
 
-/** Map of regex and replacement char to normalize fields before fuzzy filtering them */
-export const charReplacements = new Map<RegExp, string>([
+/** Map of unicode variant characters and replacements used in normalizing fields before fuzzy filtering them */
+export const charReplacements = new Map<string, string>([
     ["`´’︐︑ʻ", "'"],
     ["“”", "\""],
     [",", ","],
     ["—─ ", "-"],
     ["    ", " "],
-].map(
-    ([k, v]) => ([new RegExp(`[${k!}]`, "g"), v!])
-));
+]);

+ 11 - 3
src/songData.ts

@@ -231,13 +231,21 @@ export async function getAlbum(songId: number): Promise<Album | null> {
     }
 }
 
+const allReplaceCharsRegex = new RegExp(`[${
+    [...charReplacements.entries()].reduce((a, [chars]) => a + chars, "")
+}]`);
+
+const charReplacementRegexes = [...charReplacements.entries()]
+    .map(([chars, repl]) => ([new RegExp(`[${chars}]`, "g"), repl])) as [RegExp, string][];
+
 /** Removes invisible characters and control characters from a string and replaces weird unicode variants with the regular ASCII characters */
 function normalize(str: string): string
 {
-    charReplacements.forEach((val, regex) => {
-        if(str.match(regex))
+    if(str.match(allReplaceCharsRegex)) {
+        charReplacementRegexes.forEach(([regex, val]) => {
             str = str.replace(regex, val);
-    });
+        });
+    }
 
     return str
         .replace(/[\u0000-\u001F\u007F-\u009F\u200B]/g, "") // 0-width spaces & control characters