Comparer les chaînes Javascript Retourner% de probables

Question

Je recherche une fonction JavaScript qui peut comparer deux chaînes et renvoyer la ressemblance. J'ai regardé soundex mais ce n'est pas vraiment génial pour les chaînes multi-mots ou les non-noms. Je recherche une fonction comme:

function compare(strA,strB){ } compare("Apples","Apple") = Some X Percentage.

La fonction fonctionnerait avec tous les types de chaînes, y compris les nombres, les valeurs multi-mots et les noms. Peut-être qu'il existe un algorithme simple que je pourrais utiliser?

Ultimately none of these served my purpose so I used this: function compare(c, u) { var incept = false; var ca = c.split(","); u = clean(u); //ca = correct answer array (Collection of all correct answer) //caa = a single correct answer Word array (collection of words of a single correct answer) //u = array of user answer words cleaned using custom clean function for (var z = 0; z < ca.length; z++) { caa = $.trim(ca[z]).split(" "); var pc = 0; for (var x = 0; x < caa.length; x++) { for (var y = 0; y < u.length; y++) { if (soundex(u[y]) != null && soundex(caa[x]) != null) { if (soundex(u[y]) == soundex(caa[x])) { pc = pc + 1; } } else { if (u[y].indexOf(caa[x]) > -1) { pc = pc + 1; } } } } if ((pc / caa.length) > 0.5) { return true; } } return false; } // create object listing the SOUNDEX values for each letter // -1 indicates that the letter is not coded, but is used for coding // 0 indicates that the letter is omitted for modern census archives // but acts like -1 for older census archives // 1 is for BFPV // 2 is for CGJKQSXZ // 3 is for DT // 4 is for L // 5 is for MN my home state // 6 is for R function makesoundex() { this.a = -1 this.b = 1 this.c = 2 this.d = 3 this.e = -1 this.f = 1 this.g = 2 this.h = 0 this.i = -1 this.j = 2 this.k = 2 this.l = 4 this.m = 5 this.n = 5 this.o = -1 this.p = 1 this.q = 2 this.r = 6 this.s = 2 this.t = 3 this.u = -1 this.v = 1 this.w = 0 this.x = 2 this.y = -1 this.z = 2 } var sndx = new makesoundex() // check to see that the input is valid function isSurname(name) { if (name == "" || name == null) { return false } else { for (var i = 0; i < name.length; i++) { var letter = name.charAt(i) if (!(letter >= 'a' && letter <= 'z' || letter >= 'A' && letter <= 'Z')) { return false } } } return true } // Collapse out directly adjacent sounds // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters function collapse(surname) { if (surname.length == 1) { return surname } var right = collapse(surname.substring(1, surname.length)) if (sndx[surname.charAt(0)] == sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Collapse out directly adjacent sounds using the new National Archives method // 1. Assume that surname.length>=1 // 2. Assume that surname contains only lowercase letters // 3. H and W are completely ignored function omit(surname) { if (surname.length == 1) { return surname } var right = omit(surname.substring(1, surname.length)) if (!sndx[right.charAt(0)]) { return surname.charAt(0) + right.substring(1, right.length) } return surname.charAt(0) + right } // Output the coded sequence function output_sequence(seq) { var output = seq.charAt(0).toUpperCase() // Retain first letter output += "-" // Separate letter with a dash var stage2 = seq.substring(1, seq.length) var count = 0 for (var i = 0; i < stage2.length && count < 3; i++) { if (sndx[stage2.charAt(i)] > 0) { output += sndx[stage2.charAt(i)] count++ } } for (; count < 3; count++) { output += "0" } return output } // Compute the SOUNDEX code for the surname function soundex(value) { if (!isSurname(value)) { return null } var stage1 = collapse(value.toLowerCase()) //form.result.value=output_sequence(stage1); var stage1 = omit(value.toLowerCase()) var stage2 = collapse(stage1) return output_sequence(stage2); } function clean(u) { var u = u.replace(/\,/g, ""); u = u.toLowerCase().split(" "); var cw = ["ARRAY OF WORDS TO BE EXCLUDED FROM COMPARISON"]; var n = []; for (var y = 0; y < u.length; y++) { var test = false; for (var z = 0; z < cw.length; z++) { if (u[y] != "" && u[y] != cw[z]) { test = true; break; } } if (test) { //Don't use & or $ in comparison var val = u[y].replace("$", "").replace("&", ""); n.Push(val); } } return n; }

overlord1234 · Answer

Voici une réponse basée sur la distance de Levenshtein https://en.wikipedia.org/wiki/Levenshtein_distance

function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); }

Pour calculer la distance d'édition

function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }

Usage

similarity('Stack Overflow','Stack Ovrflw')

renvoie 0,8571428571428571

Vous pouvez jouer avec ci-dessous:

function checkSimilarity(){ var str1 = document.getElementById("lhsInput").value; var str2 = document.getElementById("rhsInput").value; document.getElementById("output").innerHTML = similarity(str1, str2); } function similarity(s1, s2) { var longer = s1; var shorter = s2; if (s1.length < s2.length) { longer = s2; shorter = s1; } var longerLength = longer.length; if (longerLength == 0) { return 1.0; } return (longerLength - editDistance(longer, shorter)) / parseFloat(longerLength); } function editDistance(s1, s2) { s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); var costs = new Array(); for (var i = 0; i <= s1.length; i++) { var lastValue = i; for (var j = 0; j <= s2.length; j++) { if (i == 0) costs[j] = j; else { if (j > 0) { var newValue = costs[j - 1]; if (s1.charAt(i - 1) != s2.charAt(j - 1)) newValue = Math.min(Math.min(newValue, lastValue), costs[j]) + 1; costs[j - 1] = lastValue; lastValue = newValue; } } } if (i > 0) costs[s2.length] = lastValue; } return costs[s2.length]; }

<div><label for="lhsInput">String 1:</label> <input type="text" id="lhsInput" oninput="checkSimilarity()" /></div> <div><label for="rhsInput">String 2:</label> <input type="text" id="rhsInput" oninput="checkSimilarity()" /></div> <div>Match: <span id="output">No Input</span></div>

jmort253 · Answer

Voici une fonction très simple qui fait une comparaison et renvoie un pourcentage basé sur l'équivalence. Bien qu'il n'ait pas été testé pour tous les scénarios possibles, il peut vous aider à démarrer.

function similar(a,b) { var lengthA = a.length; var lengthB = b.length; var equivalency = 0; var minLength = (a.length > b.length) ? b.length : a.length; var maxLength = (a.length < b.length) ? b.length : a.length; for(var i = 0; i < minLength; i++) { if(a[i] == b[i]) { equivalency++; } } var weight = equivalency / maxLength; return (weight * 100) + "%"; } alert(similar("test","tes")); // 75% alert(similar("test","test")); // 100% alert(similar("test","testt")); // 80% alert(similar("test","tess")); // 75%

VisioN · Answer

Que diriez-vous de la fonction similar_text de bibliothèque PHP.js ?

Il est basé sur une fonction PHP avec le même nom .

function similar_text (first, second) { // Calculates the similarity between two strings // discuss at: http://phpjs.org/functions/similar_text if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') { return 0; } first += ''; second += ''; var pos1 = 0, pos2 = 0, max = 0, firstLength = first.length, secondLength = second.length, p, q, l, sum; max = 0; for (p = 0; p < firstLength; p++) { for (q = 0; q < secondLength; q++) { for (l = 0; (p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++); if (l > max) { max = l; pos1 = p; pos2 = q; } } } sum = max; if (sum) { if (pos1 && pos2) { sum += this.similar_text(first.substr(0, pos2), second.substr(0, pos2)); } if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) { sum += this.similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max)); } } return sum; }

Paulpro · Answer

Juste un que j'ai écrit rapidement qui pourrait être assez bon pour vos besoins:

function Compare(strA,strB){ for(var result = 0, i = strA.length; i--;){ if(typeof strB[i] == 'undefined' || strA[i] == strB[i]); else if(strA[i].toLowerCase() == strB[i].toLowerCase()) result++; else result += 4; } return 1 - (result + 4*Math.abs(strA.length - strB.length))/(2*(strA.length+strB.length)); }

Cela pèse les caractères identiques mais différents au quart de cas autant que les caractères complètement différents ou manquants. Il renvoie un nombre compris entre 0 et 1, 1 signifiant que les chaînes sont identiques. 0 signifiant qu'ils n'ont aucune similitude. Exemples:

Compare("Apple", "Apple") // 1 Compare("Apples", "Apple") // 0.8181818181818181 Compare("Apples", "Apple") // 0.7727272727272727 Compare("a", "A") // 0.75 Compare("Apples", "appppp") // 0.45833333333333337 Compare("a", "b") // 0

Tushar Walzade · Answer

Utiliser this pour la similitude des chaînes a fonctionné comme un charme pour moi!

Voici l'exemple -

var similarity = stringSimilarity.compareTwoStrings("Apples","Apple"); // => 0.88

Octavio D&#237;az · Answer

fuzzyset - Une chaîne floue définie pour javascript. fuzzyset est une structure de données qui effectue quelque chose de semblable à une recherche plein texte sur des données pour déterminer les fautes d'orthographe probables et la correspondance approximative des chaînes. Notez qu'il s'agit d'un port javascript d'une bibliothèque python.