package org.molgenis.ontology.sorta.service.impl;

import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.QueryRule;
import org.molgenis.data.semanticsearch.semantic.Hit;
import org.molgenis.data.semanticsearch.string.NGramDistanceAlgorithm;
import org.molgenis.data.semanticsearch.string.Stemmer;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.ontology.core.meta.OntologyTermDynamicAnnotationMetaData;
import org.molgenis.ontology.core.meta.OntologyTermEntity;
import org.molgenis.ontology.core.meta.OntologyTermMetaData;
import org.molgenis.ontology.core.model.Ontology;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.model.OntologyTermAnnotation;
import org.molgenis.ontology.core.repository.OntologyTermRepository;
import org.molgenis.ontology.core.service.OntologyService;
import org.molgenis.ontology.roc.InformationContentService;
import org.molgenis.ontology.sorta.bean.SortaHit;
import org.molgenis.ontology.sorta.bean.SortaInput;
import org.molgenis.ontology.sorta.service.SortaService;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:WEB-INF/lib/molgenis-ontology-2.0.0-SNAPSHOT.jar:org/molgenis/ontology/sorta/service/impl/SortaServiceImpl.class */
public class SortaServiceImpl implements SortaService {
    private static final String NON_WORD_SEPARATOR = "[^a-zA-Z0-9]";
    private static final String ILLEGAL_CHARACTERS_PATTERN = "[^a-zA-Z0-9 ]";
    private static final String FUZZY_MATCH_SIMILARITY = "~0.8";
    private static final String SINGLE_WHITESPACE = " ";
    private static final int MAX_NUMBER_MATCHES = 50;
    private static final int NUMBER_NGRAM_MATCHES = 10;
    public static final String SIGNIFICANT_VALUE = "Significant";
    public static final String DEFAULT_MATCHING_NAME_FIELD = "Name";
    public static final String DEFAULT_MATCHING_SYNONYM_PREFIX_FIELD = "Synonym";
    public static final String DEFAULT_MATCHING_IDENTIFIER = "Identifier";
    public static final String COMBINED_SCORE = "Combined_Score";
    public static final String SCORE = "Score";
    private final DataService dataService;
    private final OntologyService ontologyService;
    private final InformationContentService informationContentService;
    private static final Set<String> ELASTICSEARCH_RESERVED_WORDS = Sets.newHashSet("or", "and", "if");
    public static final Character DEFAULT_SEPARATOR = ';';

    @Autowired
    public SortaServiceImpl(DataService dataService, OntologyService ontologyService, InformationContentService informationContentService) {
        this.dataService = (DataService) Objects.requireNonNull(dataService);
        this.ontologyService = (OntologyService) Objects.requireNonNull(ontologyService);
        this.informationContentService = (InformationContentService) Objects.requireNonNull(informationContentService);
    }

    @Override // org.molgenis.ontology.sorta.service.SortaService
    public List<SortaHit> findOntologyTermEntities(String str, Entity entity) {
        Ontology ontology = this.ontologyService.getOntology(str);
        if (ontology == null) {
            throw new IllegalArgumentException("Ontology IRI " + str + " does not exist in the database!");
        }
        SortaInput sortaInput = new SortaInput(entity);
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        Iterator<String> it = sortaInput.getLexicalMatchAttributes().iterator();
        while (it.hasNext()) {
            String value = sortaInput.getValue(it.next());
            if (StringUtils.isNotBlank(value)) {
                String stemQuery = stemQuery(value);
                arrayList2.add(new QueryRule("ontologyTermSynonym", QueryRule.Operator.FUZZY_MATCH, fuzzyMatchQuerySyntax(stemQuery)));
                arrayList3.add(new QueryRule("ontologyTermSynonym", QueryRule.Operator.FUZZY_MATCH_NGRAM, stemQuery));
            }
        }
        for (String str2 : sortaInput.getAnnotationMatchAttributes()) {
            String value2 = sortaInput.getValue(str2);
            if (StringUtils.isNotBlank(value2)) {
                QueryRule queryRule = new QueryRule("name", QueryRule.Operator.EQUALS, str2);
                QueryRule queryRule2 = new QueryRule("value", QueryRule.Operator.EQUALS, value2);
                if (arrayList.size() > 0) {
                    arrayList.add(new QueryRule(QueryRule.Operator.OR));
                }
                arrayList.add(new QueryRule((List<QueryRule>) Arrays.asList(queryRule, new QueryRule(QueryRule.Operator.AND), queryRule2)));
            }
        }
        if (arrayList.size() > 0) {
            linkedHashSet.addAll(annotationMatchOntologyTerms(sortaInput, ontology, arrayList));
        }
        if (arrayList2.size() > 0) {
            linkedHashSet.addAll(lexicalMatchOntologyTerms(sortaInput, ontology, 50 - linkedHashSet.size(), arrayList2));
        }
        if (arrayList3.size() > 0) {
            linkedHashSet.addAll(lexicalMatchOntologyTerms(sortaInput, ontology, 10, arrayList3));
        }
        ArrayList newArrayList = Lists.newArrayList(linkedHashSet);
        Collections.sort(newArrayList);
        return newArrayList;
    }

    private List<SortaHit> annotationMatchOntologyTerms(SortaInput sortaInput, Ontology ontology, List<QueryRule> list) {
        List list2 = (List) this.dataService.findAll(OntologyTermDynamicAnnotationMetaData.ONTOLOGY_TERM_DYNAMIC_ANNOTATION, new QueryImpl(list).pageSize(Integer.MAX_VALUE)).collect(Collectors.toList());
        return list2.size() > 0 ? (List) this.dataService.findAll(OntologyTermMetaData.ONTOLOGY_TERM, new QueryImpl((List<QueryRule>) Arrays.asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, QueryRule.Operator.EQUALS, ontology.getId()), new QueryRule(QueryRule.Operator.AND), new QueryRule(OntologyTermMetaData.ONTOLOGY_TERM_DYNAMIC_ANNOTATION, QueryRule.Operator.IN, list2))).pageSize(Integer.MAX_VALUE), OntologyTermEntity.class).map(OntologyTermRepository::toOntologyTerm).map(ontologyTerm -> {
            return calculateNGromOTAnnotations(sortaInput, ontologyTerm);
        }).filter(sortaHit -> {
            return sortaHit != null;
        }).collect(Collectors.toList()) : Collections.emptyList();
    }

    private List<SortaHit> lexicalMatchOntologyTerms(SortaInput sortaInput, Ontology ontology, int i, List<QueryRule> list) {
        QueryRule queryRule = new QueryRule(list);
        queryRule.setOperator(QueryRule.Operator.DIS_MAX);
        return (List) this.dataService.findAll(OntologyTermMetaData.ONTOLOGY_TERM, new QueryImpl((List<QueryRule>) Arrays.asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, QueryRule.Operator.EQUALS, ontology.getId()), new QueryRule(QueryRule.Operator.AND), queryRule)).pageSize(i), OntologyTermEntity.class).map(OntologyTermRepository::toOntologyTerm).map(ontologyTerm -> {
            return computeLexicalSimilarity(sortaInput, ontologyTerm, ontology);
        }).collect(Collectors.toList());
    }

    SortaHit computeLexicalSimilarity(SortaInput sortaInput, OntologyTerm ontologyTerm, Ontology ontology) {
        SortaHit findSynonymWithHighestNgramScore;
        SortaHit sortaHit = null;
        Iterator<String> it = sortaInput.getLexicalMatchAttributes().iterator();
        while (it.hasNext()) {
            String value = sortaInput.getValue(it.next());
            if (StringUtils.isNotBlank(value) && (findSynonymWithHighestNgramScore = findSynonymWithHighestNgramScore(value, ontology, ontologyTerm)) != null && (sortaHit == null || sortaHit.getWeightedScore() < findSynonymWithHighestNgramScore.getWeightedScore())) {
                sortaHit = findSynonymWithHighestNgramScore;
            }
        }
        return sortaHit;
    }

    private SortaHit calculateNGromOTAnnotations(SortaInput sortaInput, OntologyTerm ontologyTerm) {
        for (String str : sortaInput.getAnnotationMatchAttributes()) {
            for (OntologyTermAnnotation ontologyTermAnnotation : ontologyTerm.getAnnotations()) {
                String name = ontologyTermAnnotation.getName();
                String value = ontologyTermAnnotation.getValue();
                if (str.equalsIgnoreCase(name) && sortaInput.getValue(str).equalsIgnoreCase(value)) {
                    return SortaHit.create(ontologyTerm, 100.0d, 100.0d);
                }
            }
        }
        return null;
    }

    private SortaHit findSynonymWithHighestNgramScore(String str, Ontology ontology, OntologyTerm ontologyTerm) {
        List list = (List) ontologyTerm.getSynonyms().stream().map(str2 -> {
            return calculateLexicalSimilarity(str, str2);
        }).sorted(Ordering.natural().reverse()).collect(Collectors.toList());
        if (list.size() <= 0) {
            return null;
        }
        float score = ((Hit) list.get(0)).getScore();
        String str3 = (String) ((Hit) list.get(0)).getResult();
        Iterator it = Iterables.skip(list, 1).iterator();
        while (it.hasNext()) {
            String str4 = str3 + " " + ((String) ((Hit) it.next()).getResult());
            Hit<String> calculateLexicalSimilarity = calculateLexicalSimilarity(str, str4);
            if (calculateLexicalSimilarity.getScore() > score) {
                score = calculateLexicalSimilarity.getScore();
                str3 = str4;
            }
        }
        String removeIllegalCharWithSingleWhiteSpace = removeIllegalCharWithSingleWhiteSpace(str);
        Map<String, Double> redistributedNGramScore = this.informationContentService.redistributedNGramScore(removeIllegalCharWithSingleWhiteSpace, ontology.getIRI());
        Set<String> createStemmedWordSet = this.informationContentService.createStemmedWordSet(str3);
        return SortaHit.create(ontologyTerm, score * 100.0f, (score * 100.0f) + this.informationContentService.createStemmedWordSet(removeIllegalCharWithSingleWhiteSpace).stream().filter(str5 -> {
            return createStemmedWordSet.contains(str5) && redistributedNGramScore.containsKey(str5);
        }).map(str6 -> {
            return (Double) redistributedNGramScore.get(str6);
        }).mapToDouble((v0) -> {
            return v0.doubleValue();
        }).sum());
    }

    private Hit<String> calculateLexicalSimilarity(String str, String str2) {
        return Hit.create(str2, ((float) NGramDistanceAlgorithm.stringMatching(removeIllegalCharWithSingleWhiteSpace(str), removeIllegalCharWithSingleWhiteSpace(str2))) / 100.0f);
    }

    private String stemQuery(String str) {
        return StringUtils.join(Sets.newLinkedHashSet((List) Stream.of((Object[]) str.toLowerCase().trim().split(NON_WORD_SEPARATOR)).filter(str2 -> {
            return (NGramDistanceAlgorithm.STOPWORDSLIST.contains(str2) || ELASTICSEARCH_RESERVED_WORDS.contains(str2)) ? false : true;
        }).map(Stemmer::stem).filter((v0) -> {
            return StringUtils.isNotBlank(v0);
        }).collect(Collectors.toList())), " ");
    }

    private String fuzzyMatchQuerySyntax(String str) {
        return StringUtils.join((List) Stream.of((Object[]) str.split(" ")).map(str2 -> {
            return str2 + FUZZY_MATCH_SIMILARITY;
        }).collect(Collectors.toList()), " ");
    }

    public String removeIllegalCharWithSingleWhiteSpace(String str) {
        return str.replaceAll(ILLEGAL_CHARACTERS_PATTERN, " ");
    }

    public String removeIllegalCharWithEmptyString(String str) {
        return str.replaceAll(ILLEGAL_CHARACTERS_PATTERN, "");
    }
}
