-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlangdetect-worker.js
106 lines (99 loc) · 2.31 KB
/
langdetect-worker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
importScripts("guesslang.min.js");
const GUESSLANG_LANGUAGES = [
"json",
"py",
"html",
"sql",
"md",
"java",
"php",
"css",
"xml",
"cpp",
"rs",
"cs",
"rb",
"sh",
"yaml",
"toml",
"go",
"clj",
"erl",
"js",
"ts",
"swift",
"kt",
"groovy",
"ps1",
"dart",
"scala",
"lua",
];
const guessLang = new self.GuessLang();
onmessage = (event) => {
//console.log("worker received message:", event.data)
//importScripts("../../lib/highlight.min.js")
const content = event.data.content;
// we first check some custom heuristic rules to determine if the language is JSON
const trimmedContent = content.trim();
if (
(trimmedContent.startsWith("{") && trimmedContent.endsWith("}")) ||
(trimmedContent.startsWith("[") && trimmedContent.endsWith("]"))
) {
try {
if (typeof JSON.parse(trimmedContent) === "object") {
postMessage({
guesslang: {
language: "json",
confidence: 1.0,
},
content: content,
idx: event.data.idx,
});
return;
}
} catch (e) {
// JSON could not be parsed, do nothing
}
}
//let startTime = performance.now()
guessLang.runModel(content).then((result) => {
//const duration = performance.now() - startTime
// console.log("Guessing language done:", result, result[0]?.languageId, result[0]?.confidence)
//console.log("Guessing language took", duration, "ms")
if (result.length > 0) {
// for the language that is most likely according to GuessLang we have a lower threshold (0.15)
const lang = result[0];
if (
GUESSLANG_LANGUAGES.includes(lang.languageId) &&
lang.confidence > 0.15
) {
postMessage({
guesslang: {
language: lang.languageId,
confidence: lang.confidence,
},
content: content,
idx: event.data.idx,
});
return;
}
}
for (let lang of result) {
if (
GUESSLANG_LANGUAGES.includes(lang.languageId) &&
lang.confidence > 0.5
) {
postMessage({
guesslang: {
language: lang.languageId,
confidence: lang.confidence,
},
content: content,
idx: event.data.idx,
});
return;
}
}
});
};