package org.molgenis.data.semanticsearch.string;

import ch.qos.logback.classic.joran.action.InsertFromJNDIAction;
import com.google.common.collect.Lists;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;
import org.eclipse.persistence.config.PersistenceUnitProperties;
import org.eclipse.persistence.internal.helper.Helper;
import org.eclipse.persistence.sdo.SDOConstants;
import org.elasticsearch.common.collect.Sets;
import org.elasticsearch.index.query.NotFilterParser;
import org.elasticsearch.threadpool.ThreadPool;
import org.semanticweb.owlapi.rdf.util.RDFConstants;
import org.springframework.beans.propertyeditors.CustomBooleanEditor;

/* loaded from: input_file:WEB-INF/lib/molgenis-data-semanticsearch-1.15.1-SNAPSHOT.jar:org/molgenis/data/semanticsearch/string/NGramDistanceAlgorithm.class */
public class NGramDistanceAlgorithm {
    private static int nGrams = 2;
    public static final Set<String> STOPWORDSLIST = Sets.newHashSet("a", "you", RDFConstants.ATTR_ABOUT, "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "aren't", InsertFromJNDIAction.AS_ATTR, "at", "be", "because", "been", "before", "being", "below", "between", PersistenceUnitProperties.DDL_BOTH_GENERATION, "but", "by", "can't", "cannot", "could", "couldn't", "did", "didn't", "do", "does", "doesn't", "doing", "don't", "down", "during", "each", "few", "for", "from", "further", "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "he's", "her", "here", "here's", "hers", "herself", "him", "himself", "his", "how", "how's", "i", "i'd", "i'll", "i'm", "i've", "if", "in", "into", Helper.IS_PROPERTY_METHOD_PREFIX, "isn't", "it", "it's", "its", "itself", "let's", "me", "more", "most", "mustn't", "my", "myself", "no", "nor", NotFilterParser.NAME, "of", CustomBooleanEditor.VALUE_OFF, CustomBooleanEditor.VALUE_ON, "once", "only", "or", "other", "ought", "our", "ours", "ourselves", "out", "over", "own", ThreadPool.Names.SAME, "shan't", "she", "she'd", "she'll", "she's", "should", "shouldn't", "so", "some", "such", "than", "that", "that's", "the", "their", "theirs", "them", "themselves", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "through", "to", "too", "under", "until", "up", "very", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "were", "weren't", "what", "what's", "when", "when's", "where", "where's", "which", "while", "who", "who's", "whom", "why", "why's", "with", "won't", "would", "wouldn't", "you", "you'd", "you'll", "you're", "you've", "your", "yours", "yourself", "yourselves", SDOConstants.SDOXML_MANY, ")", "(");
    private static Stemmer CUSTOM_STEMMER = new Stemmer();

    public static double stringMatching(String str, String str2) {
        return calculateScore(createNGrams(str.toLowerCase().trim(), true), createNGrams(str2.toLowerCase().trim(), true));
    }

    public static double stringMatching(String str, String str2, boolean z) {
        return calculateScore(createNGrams(str.toLowerCase().trim(), z), createNGrams(str2.toLowerCase().trim(), z));
    }

    public static Map<String, Integer> createNGrams(String str, boolean z) {
        ArrayList newArrayList = Lists.newArrayList(CUSTOM_STEMMER.replaceIllegalCharacter(str).split(" "));
        if (z) {
            newArrayList.removeAll(STOPWORDSLIST);
        }
        Stream stream = newArrayList.stream();
        Stemmer stemmer = CUSTOM_STEMMER;
        stemmer.getClass();
        List<String> list = (List) stream.map(stemmer::stem).collect(Collectors.toList());
        HashMap hashMap = new HashMap();
        for (String str2 : list) {
            if (!StringUtils.isEmpty(str2)) {
                StringBuilder sb = new StringBuilder(str2.length() + 2);
                sb.append('^').append(str2.toLowerCase()).append('$');
                int length = sb.length();
                for (int i = 0; i < length - 1; i++) {
                    String substring = i + nGrams < length ? sb.substring(i, i + nGrams) : sb.substring(length - 2);
                    if (hashMap.containsKey(substring)) {
                        hashMap.put(substring, Integer.valueOf(((Integer) hashMap.get(substring)).intValue() + 1));
                    } else {
                        hashMap.put(substring, 1);
                    }
                }
            }
        }
        return hashMap;
    }

    private static double calculateScore(Map<String, Integer> map, Map<String, Integer> map2) {
        if (map.size() == 0 || map2.size() == 0) {
            return CMAESOptimizer.DEFAULT_STOPFITNESS;
        }
        double totalNumTokens = getTotalNumTokens(map) + getTotalNumTokens(map2);
        int i = 0;
        for (String str : map.keySet()) {
            if (map2.containsKey(str)) {
                i += Math.min(map.get(str).intValue(), map2.get(str).intValue());
            }
        }
        return Double.parseDouble(new DecimalFormat("##.###", new DecimalFormatSymbols(Locale.ENGLISH)).format(((2 * i) / totalNumTokens) * 100.0d));
    }

    private static int getTotalNumTokens(Map<String, Integer> map) {
        int i = 0;
        Iterator<Integer> it = map.values().iterator();
        while (it.hasNext()) {
            i += it.next().intValue();
        }
        return i;
    }
}
