package org.molgenis.data.semanticsearch.utils;

import com.google.common.collect.Sets;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.molgenis.data.meta.model.AttributeMetaData;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.utils.NGramDistanceAlgorithm;
import org.molgenis.ontology.utils.Stemmer;

/* loaded from: input_file:WEB-INF/lib/molgenis-data-semanticsearch-2.0.0-SNAPSHOT.jar:org/molgenis/data/semanticsearch/utils/SemanticSearchServiceUtils.class */
public class SemanticSearchServiceUtils {
    private static final String ILLEGAL_CHARS_REGEX = "[^\\p{IsAlphabetic}0-9]+";

    public static Set<String> findMatchedWords(String str, String str2) {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        Set set = (Set) splitIntoUniqueTerms(str2).stream().map(Stemmer::stem).collect(Collectors.toSet());
        for (String str3 : splitIntoUniqueTerms(str)) {
            if (set.contains(Stemmer.stem(str3))) {
                linkedHashSet.add(str3);
            }
        }
        return linkedHashSet;
    }

    public static Set<String> getQueryTermsFromAttribute(AttributeMetaData attributeMetaData, Set<String> set) {
        HashSet hashSet = new HashSet();
        if (set != null && !set.isEmpty()) {
            hashSet.addAll(set);
        } else if (attributeMetaData != null) {
            if (StringUtils.isNotBlank(attributeMetaData.getLabel())) {
                hashSet.add(attributeMetaData.getLabel());
            }
            if (StringUtils.isNotBlank(attributeMetaData.getDescription())) {
                hashSet.add(attributeMetaData.getDescription());
            }
        }
        return hashSet;
    }

    public static Set<String> collectLowerCaseTerms(OntologyTerm ontologyTerm) {
        LinkedHashSet newLinkedHashSet = Sets.newLinkedHashSet();
        newLinkedHashSet.addAll((Collection) ontologyTerm.getSynonyms().stream().map(StringUtils::lowerCase).collect(Collectors.toList()));
        newLinkedHashSet.add(ontologyTerm.getLabel().toLowerCase());
        return newLinkedHashSet;
    }

    public static Set<String> getLowerCaseTerms(OntologyTerm ontologyTerm) {
        LinkedHashSet newLinkedHashSet = Sets.newLinkedHashSet();
        newLinkedHashSet.addAll((Collection) ontologyTerm.getSynonyms().stream().map(StringUtils::lowerCase).collect(Collectors.toList()));
        newLinkedHashSet.add(ontologyTerm.getLabel().toLowerCase());
        return newLinkedHashSet;
    }

    public static Set<String> splitIntoUniqueTerms(String str) {
        return Sets.newLinkedHashSet((Iterable) Arrays.stream(str.split(ILLEGAL_CHARS_REGEX)).map(StringUtils::lowerCase).filter((v0) -> {
            return StringUtils.isNotBlank(v0);
        }).collect(Collectors.toList()));
    }

    public static List<String> splitIntoTerms(String str) {
        return (List) Arrays.stream(str.split(ILLEGAL_CHARS_REGEX)).map(StringUtils::lowerCase).filter((v0) -> {
            return StringUtils.isNotBlank(v0);
        }).collect(Collectors.toList());
    }

    public static Set<String> splitRemoveStopWords(String str) {
        return Sets.newLinkedHashSet((Iterable) Arrays.stream(str.split(ILLEGAL_CHARS_REGEX)).map(StringUtils::lowerCase).filter(str2 -> {
            return !NGramDistanceAlgorithm.STOPWORDSLIST.contains(str2) && StringUtils.isNotBlank(str2);
        }).collect(Collectors.toList()));
    }
}
