package org.molgenis.data.semanticsearch.service.impl;

import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.common.base.Joiner;
import org.molgenis.data.semanticsearch.explain.bean.OntologyTermHit;
import org.molgenis.data.semanticsearch.explain.criteria.MatchingCriterion;
import org.molgenis.data.semanticsearch.explain.criteria.impl.StrictMatchingCriterion;
import org.molgenis.data.semanticsearch.semantic.Hit;
import org.molgenis.data.semanticsearch.service.TagGroupGenerator;
import org.molgenis.data.semanticsearch.service.bean.TagGroup;
import org.molgenis.data.semanticsearch.utils.OntologyTermHitComparator;
import org.molgenis.data.semanticsearch.utils.SemanticSearchServiceUtils;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.model.SemanticType;
import org.molgenis.ontology.core.service.OntologyService;
import org.molgenis.ontology.utils.NGramDistanceAlgorithm;
import org.molgenis.ontology.utils.Stemmer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:WEB-INF/lib/molgenis-data-semanticsearch-2.0.0-SNAPSHOT.jar:org/molgenis/data/semanticsearch/service/impl/TagGroupGeneratorImpl.class */
public class TagGroupGeneratorImpl implements TagGroupGenerator {
    private final OntologyService ontologyService;
    public static final int MAX_NUM_TAGS = 3000;
    private static final String ILLEGAL_CHARS_REGEX = "[^\\p{L}'a-zA-Z0-9\\.~]+";
    private Joiner termJoiner = Joiner.on(' ');
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) TagGroupGeneratorImpl.class);
    public static final MatchingCriterion STRICT_MATCHING_CRITERION = new StrictMatchingCriterion();

    @Autowired
    public TagGroupGeneratorImpl(OntologyService ontologyService) {
        this.ontologyService = (OntologyService) Objects.requireNonNull(ontologyService);
    }

    @Override // org.molgenis.data.semanticsearch.service.TagGroupGenerator
    public List<TagGroup> generateTagGroups(String str, List<String> list) {
        return generateTagGroups(str, list, (List) this.ontologyService.getAllSemanticTypes().stream().filter((v0) -> {
            return v0.isGlobalKeyConcept();
        }).collect(Collectors.toList()));
    }

    @Override // org.molgenis.data.semanticsearch.service.TagGroupGenerator
    public List<TagGroup> generateTagGroups(String str, List<String> list, List<SemanticType> list2) {
        Set<String> removeIllegalCharactersAndStopWords = removeIllegalCharactersAndStopWords(str);
        LOG.debug("findTagGroups({},{},{})", list, removeIllegalCharactersAndStopWords, 3000);
        List<OntologyTermHit> list3 = (List) applyTagMatchingCriterion(this.ontologyService.findOntologyTerms(list, removeIllegalCharactersAndStopWords, 3000), removeIllegalCharactersAndStopWords, STRICT_MATCHING_CRITERION).stream().filter(ontologyTermHit -> {
            return containsKeyConcepts(list2, ontologyTermHit);
        }).collect(Collectors.toList());
        LOG.debug("Candidates: {}", list3);
        List<TagGroup> combineTagGroups = combineTagGroups(removeIllegalCharactersAndStopWords, list3);
        LOG.debug("OntologyTermHit: {}", combineTagGroups);
        return combineTagGroups;
    }

    @Override // org.molgenis.data.semanticsearch.service.TagGroupGenerator
    public List<TagGroup> combineTagGroups(Set<String> set, List<OntologyTermHit> list) {
        list.sort(new OntologyTermHitComparator());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Hits: {}", list);
        }
        ArrayList arrayList = new ArrayList();
        LinkedHashMultimap create = LinkedHashMultimap.create();
        list.stream().forEach(ontologyTermHit -> {
            create.put(ontologyTermHit.getMatchedWords(), ontologyTermHit);
        });
        ArrayList newArrayList = Lists.newArrayList(create.keySet());
        HashSet hashSet = new HashSet();
        for (int i = 0; i < newArrayList.size(); i++) {
            String str = (String) newArrayList.get(i);
            if (!hashSet.contains(str)) {
                LinkedHashMultimap create2 = LinkedHashMultimap.create();
                create2.putAll(str, create.get((LinkedHashMultimap) str));
                for (int i2 = i + 1; i2 < newArrayList.size(); i2++) {
                    String str2 = (String) newArrayList.get(i2);
                    if (round(distanceFrom(this.termJoiner.join(Sets.union(removeIllegalCharactersAndStopWords(str), removeIllegalCharactersAndStopWords(str2))), set)) > round(distanceFrom(str, set))) {
                        create2.putAll(str2, create.get((LinkedHashMultimap) str2));
                    }
                }
                hashSet.addAll(create2.keySet());
                String join = this.termJoiner.join(SemanticSearchServiceUtils.splitIntoUniqueTerms(this.termJoiner.join(create2.keySet())));
                float round = round(distanceFrom(join, set));
                arrayList.addAll((List) createTagGroups(create2).stream().map(list2 -> {
                    return TagGroup.create((List<OntologyTerm>) list2, join, round);
                }).collect(Collectors.toList()));
            }
        }
        return (List) arrayList.stream().limit(20L).collect(Collectors.toList());
    }

    @Override // org.molgenis.data.semanticsearch.service.TagGroupGenerator
    public List<OntologyTermHit> applyTagMatchingCriterion(List<OntologyTerm> list, Set<String> set, MatchingCriterion matchingCriterion) {
        if (list.size() <= 0) {
            return Collections.emptyList();
        }
        Set set2 = (Set) set.stream().map(Stemmer::stem).collect(Collectors.toSet());
        List list2 = (List) list.stream().filter(ontologyTerm -> {
            return matchingCriterion.apply(set2, ontologyTerm);
        }).map(ontologyTerm2 -> {
            return createTagGroup(set2, ontologyTerm2);
        }).sorted(new OntologyTermHitComparator()).collect(Collectors.toList());
        ArrayList newArrayList = Lists.newArrayList(list2);
        for (int size = list2.size() - 1; size > 1; size--) {
            OntologyTerm ontologyTerm3 = ((OntologyTermHit) list2.get(size)).getOntologyTerm();
            for (int i = size - 1; i > 0; i--) {
                if (this.ontologyService.isDescendant(((OntologyTermHit) list2.get(i)).getOntologyTerm(), ontologyTerm3)) {
                    newArrayList.remove(list2.get(size));
                }
            }
        }
        return newArrayList;
    }

    private OntologyTermHit createTagGroup(Set<String> set, OntologyTerm ontologyTerm) {
        Hit<String> bestMatchingSynonym = bestMatchingSynonym(ontologyTerm, set);
        return OntologyTermHit.create(ontologyTerm, Stemmer.cleanStemPhrase(bestMatchingSynonym.getResult()), bestMatchingSynonym.getScore());
    }

    List<List<OntologyTerm>> createTagGroups(Multimap<String, OntologyTermHit> multimap) {
        ArrayList arrayList = new ArrayList();
        Iterator<Collection<OntologyTermHit>> it = multimap.asMap().values().iterator();
        while (it.hasNext()) {
            List<OntologyTerm> list = (List) it.next().stream().map((v0) -> {
                return v0.getOntologyTerm();
            }).collect(Collectors.toList());
            if (arrayList.isEmpty()) {
                arrayList.addAll((Collection) list.stream().map(ontologyTerm -> {
                    return Lists.newArrayList(ontologyTerm);
                }).collect(Collectors.toList()));
            } else {
                ArrayList arrayList2 = new ArrayList();
                for (OntologyTerm ontologyTerm2 : list) {
                    List list2 = (List) arrayList.stream().map(list3 -> {
                        return Lists.newArrayList(list3);
                    }).collect(Collectors.toList());
                    list2.forEach(list4 -> {
                        list4.add(ontologyTerm2);
                    });
                    arrayList2.addAll(list2);
                }
                arrayList = Lists.newArrayList(arrayList2);
            }
        }
        return arrayList;
    }

    Hit<String> bestMatchingSynonym(OntologyTerm ontologyTerm, Set<String> set) {
        return (Hit) ((List) SemanticSearchServiceUtils.collectLowerCaseTerms(ontologyTerm).stream().map(str -> {
            return this.termJoiner.join(removeIllegalCharactersAndStopWords(str));
        }).filter(str2 -> {
            return set.containsAll(Stemmer.splitAndStem(str2));
        }).map(str3 -> {
            return Hit.create(str3, distanceFrom(str3, set));
        }).sorted(Comparator.reverseOrder()).collect(Collectors.toList())).get(0);
    }

    float distanceFrom(String str, Set<String> set) {
        String stemAndJoin = Stemmer.stemAndJoin(removeIllegalCharactersAndStopWords(str));
        String stemAndJoin2 = Stemmer.stemAndJoin(set);
        float stringMatching = ((float) NGramDistanceAlgorithm.stringMatching(stemAndJoin, stemAndJoin2)) / 100.0f;
        LOG.debug("Similarity between: {} and {} is {}", stemAndJoin, stemAndJoin2, Float.valueOf(stringMatching));
        return stringMatching;
    }

    Set<String> removeIllegalCharactersAndStopWords(String str) {
        return Sets.newLinkedHashSet((Iterable) Arrays.stream(str.split(ILLEGAL_CHARS_REGEX)).map(StringUtils::lowerCase).filter(str2 -> {
            return !NGramDistanceAlgorithm.STOPWORDSLIST.contains(str2);
        }).filter((v0) -> {
            return StringUtils.isNotBlank(v0);
        }).collect(Collectors.toList()));
    }

    float round(float f) {
        return Math.round(f * 100000.0f) / 100000.0f;
    }

    private boolean containsKeyConcepts(List<SemanticType> list, OntologyTermHit ontologyTermHit) {
        if (list.isEmpty()) {
            return true;
        }
        Stream<SemanticType> stream = ontologyTermHit.getOntologyTerm().getSemanticTypes().stream();
        list.getClass();
        return stream.allMatch((v1) -> {
            return r1.contains(v1);
        });
    }
}
