package org.molgenis.data.discovery.service.impl;

import com.google.common.base.Joiner;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.molgenis.data.discovery.filters.DataTypePostFilter;
import org.molgenis.data.discovery.model.biobank.BiobankSampleAttribute;
import org.molgenis.data.discovery.model.biobank.BiobankUniverse;
import org.molgenis.data.discovery.model.matching.AttributeMappingCandidate;
import org.molgenis.data.discovery.model.matching.MatchingExplanation;
import org.molgenis.data.discovery.service.OntologyBasedExplainService;
import org.molgenis.data.discovery.utils.MatchingExplanationHit;
import org.molgenis.data.populate.IdGenerator;
import org.molgenis.data.semanticsearch.service.bean.SearchParam;
import org.molgenis.data.semanticsearch.utils.SemanticSearchServiceUtils;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.model.SemanticType;
import org.molgenis.ontology.core.service.OntologyService;
import org.molgenis.ontology.utils.NGramDistanceAlgorithm;
import org.molgenis.ontology.utils.Stemmer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:WEB-INF/lib/molgenis-data-discovery-2.0.0-SNAPSHOT.jar:org/molgenis/data/discovery/service/impl/OntologyBasedExplainServiceImpl.class */
public class OntologyBasedExplainServiceImpl implements OntologyBasedExplainService {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) OntologyBasedExplainServiceImpl.class);
    private static final String DIGIT_PATTERN = "\\d+";
    private final Joiner termJoiner = Joiner.on(' ');
    private final DataTypePostFilter dataTypePostFilter = new DataTypePostFilter();
    private final IdGenerator idGenerator;
    private final OntologyService ontologyService;

    @Autowired
    public OntologyBasedExplainServiceImpl(IdGenerator idGenerator, OntologyService ontologyService) {
        this.idGenerator = (IdGenerator) Objects.requireNonNull(idGenerator);
        this.ontologyService = (OntologyService) Objects.requireNonNull(ontologyService);
    }

    @Override // org.molgenis.data.discovery.service.OntologyBasedExplainService
    public List<AttributeMappingCandidate> explain(BiobankUniverse biobankUniverse, SearchParam searchParam, BiobankSampleAttribute biobankSampleAttribute, List<BiobankSampleAttribute> list, AttributeCandidateScoringImpl attributeCandidateScoringImpl) {
        HashMap hashMap = new HashMap();
        ArrayList arrayList = new ArrayList();
        LOG.trace("Started explaining the matched source attributes");
        for (BiobankSampleAttribute biobankSampleAttribute2 : list) {
            MatchingExplanationHit score = attributeCandidateScoringImpl.score(biobankSampleAttribute, biobankSampleAttribute2, findAllRelatedOntologyTerms(biobankSampleAttribute, biobankSampleAttribute2, biobankUniverse), searchParam.getMatchParam().isStrictMatch());
            MatchingExplanation create = MatchingExplanation.create(this.idGenerator.generateId(), (List) score.getMatchedOntologyTermHits().stream().map((v0) -> {
                return v0.getTarget();
            }).map((v0) -> {
                return v0.getOntologyTerm();
            }).collect(Collectors.toList()), (List) score.getMatchedOntologyTermHits().stream().map((v0) -> {
                return v0.getSource();
            }).map((v0) -> {
                return v0.getOntologyTerm();
            }).collect(Collectors.toList()), score.getMatchedWords(), this.termJoiner.join(SemanticSearchServiceUtils.findMatchedWords(score.getMatchedWords(), biobankSampleAttribute.getLabel())), this.termJoiner.join(SemanticSearchServiceUtils.findMatchedWords(score.getMatchedWords(), biobankSampleAttribute2.getLabel())), score.getVsmScore(), score.getNgramScore());
            String matchedWords = create.getMatchedWords();
            if (hashMap.containsKey(matchedWords)) {
                if (((Boolean) hashMap.get(matchedWords)).booleanValue()) {
                    arrayList.add(AttributeMappingCandidate.create(this.idGenerator.generateId(), biobankUniverse, biobankSampleAttribute, biobankSampleAttribute2, create));
                }
            } else if (isMatchHighQuality(create, searchParam, biobankUniverse)) {
                arrayList.add(AttributeMappingCandidate.create(this.idGenerator.generateId(), biobankUniverse, biobankSampleAttribute, biobankSampleAttribute2, create));
                hashMap.put(matchedWords, true);
            } else {
                hashMap.put(matchedWords, false);
            }
        }
        LOG.trace("Finished explaining the matched source attributes");
        return (List) arrayList.stream().sorted().collect(Collectors.toList());
    }

    private boolean isMatchHighQuality(MatchingExplanation matchingExplanation, SearchParam searchParam, BiobankUniverse biobankUniverse) {
        if (matchingExplanation.getNgramScore() > searchParam.getMatchParam().getHighQualityThreshold()) {
            return true;
        }
        List<OntologyTerm> ontologyTerms = matchingExplanation.getOntologyTerms();
        if (ontologyTerms.isEmpty()) {
            ontologyTerms = this.ontologyService.findExactOntologyTerms(this.ontologyService.getAllOntologyIds(), SemanticSearchServiceUtils.splitIntoUniqueTerms(matchingExplanation.getMatchedWords()), 10);
        }
        List<SemanticType> keyConcepts = biobankUniverse.getKeyConcepts();
        LinkedHashMultimap create = LinkedHashMultimap.create();
        Set<String> splitAndStem = Stemmer.splitAndStem(matchingExplanation.getMatchedWords());
        for (OntologyTerm ontologyTerm : ontologyTerms) {
            Optional findFirst = ontologyTerm.getSynonyms().stream().map(Stemmer::splitAndStem).filter(set -> {
                return splitAndStem.containsAll(set);
            }).map(set2 -> {
                return (String) set2.stream().sorted().collect(Collectors.joining(" "));
            }).findFirst();
            if (findFirst.isPresent()) {
                create.put(findFirst.get(), ontologyTerm);
            }
        }
        return !((List) create.asMap().values().stream().filter(collection -> {
            return areOntologyTermsImportant(keyConcepts, collection);
        }).collect(Collectors.toList())).isEmpty() && ((String) SemanticSearchServiceUtils.splitIntoUniqueTerms(matchingExplanation.getMatchedTargetWords()).stream().map((v0) -> {
            return v0.toLowerCase();
        }).filter(str -> {
            return !NGramDistanceAlgorithm.STOPWORDSLIST.contains(str);
        }).filter(str2 -> {
            return !str2.matches(DIGIT_PATTERN);
        }).collect(Collectors.joining(" "))).length() >= 3 && ((String) SemanticSearchServiceUtils.splitIntoUniqueTerms(matchingExplanation.getMatchedSourceWords()).stream().map((v0) -> {
            return v0.toLowerCase();
        }).filter(str3 -> {
            return !NGramDistanceAlgorithm.STOPWORDSLIST.contains(str3);
        }).filter(str4 -> {
            return !str4.matches(DIGIT_PATTERN);
        }).collect(Collectors.joining(" "))).length() >= 3;
    }

    private boolean areOntologyTermsImportant(List<SemanticType> list, Collection<OntologyTerm> collection) {
        return collection.stream().filter(ontologyTerm -> {
            return ontologyTerm.getSemanticTypes().isEmpty() || ontologyTerm.getSemanticTypes().stream().allMatch(semanticType -> {
                return semanticType.isGlobalKeyConcept() && !list.contains(semanticType);
            });
        }).count() >= collection.stream().filter(ontologyTerm2 -> {
            return !ontologyTerm2.getSemanticTypes().isEmpty() && ontologyTerm2.getSemanticTypes().stream().anyMatch(semanticType -> {
                return !semanticType.isGlobalKeyConcept() || list.contains(semanticType);
            });
        }).count();
    }

    private Multimap<OntologyTerm, OntologyTerm> findAllRelatedOntologyTerms(BiobankSampleAttribute biobankSampleAttribute, BiobankSampleAttribute biobankSampleAttribute2, BiobankUniverse biobankUniverse) {
        LinkedHashMultimap create = LinkedHashMultimap.create();
        Set<OntologyTerm> allOntologyTerms = getAllOntologyTerms(biobankSampleAttribute, biobankUniverse);
        Set<OntologyTerm> allOntologyTerms2 = getAllOntologyTerms(biobankSampleAttribute2, biobankUniverse);
        for (OntologyTerm ontologyTerm : allOntologyTerms) {
            for (OntologyTerm ontologyTerm2 : allOntologyTerms2) {
                if (this.ontologyService.related(ontologyTerm, ontologyTerm2, 4) && this.ontologyService.areWithinDistance(ontologyTerm, ontologyTerm2, 5)) {
                    create.put(ontologyTerm, ontologyTerm2);
                }
            }
        }
        return create;
    }

    private Set<OntologyTerm> getAllOntologyTerms(BiobankSampleAttribute biobankSampleAttribute, BiobankUniverse biobankUniverse) {
        List<SemanticType> keyConcepts = biobankUniverse.getKeyConcepts();
        return (Set) biobankSampleAttribute.getTagGroups().stream().flatMap(identifiableTagGroup -> {
            return identifiableTagGroup.getOntologyTerms().stream();
        }).filter(ontologyTerm -> {
            return areSemanticTypesImportant(ontologyTerm, keyConcepts);
        }).collect(Collectors.toSet());
    }

    private boolean areSemanticTypesImportant(OntologyTerm ontologyTerm, List<SemanticType> list) {
        Iterator<SemanticType> it = ontologyTerm.getSemanticTypes().iterator();
        while (it.hasNext()) {
            if (list.contains(it.next())) {
                return false;
            }
        }
        return true;
    }
}
