package org.molgenis.data.discovery.scoring.collections;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.DoubleStream;
import java.util.stream.Stream;
import org.apache.commons.math3.optimization.direct.CMAESOptimizer;
import org.apache.lucene.util.packed.PackedInts;
import org.molgenis.data.discovery.model.biobank.BiobankSampleCollection;
import org.molgenis.data.discovery.model.biobank.BiobankUniverse;
import org.molgenis.data.discovery.model.biobank.BiobankUniverseMemberVector;
import org.molgenis.data.discovery.model.matching.BiobankSampleCollectionSimilarity;
import org.molgenis.data.discovery.model.matching.OntologyTermMatch;
import org.molgenis.data.discovery.repo.BiobankUniverseRepository;
import org.molgenis.data.populate.IdGenerator;
import org.molgenis.data.semanticsearch.semantic.Hit;
import org.molgenis.ontology.core.model.OntologyTerm;
import org.molgenis.ontology.core.model.SemanticType;
import org.molgenis.ontology.core.service.OntologyService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.aop.framework.autoproxy.target.QuickTargetSourceCreator;

/* loaded from: input_file:WEB-INF/lib/molgenis-data-discovery-2.0.0-SNAPSHOT.jar:org/molgenis/data/discovery/scoring/collections/VectorSpaceModelCollectionSimilarity.class */
public class VectorSpaceModelCollectionSimilarity {
    private final IdGenerator idGenerator;
    private final BiobankUniverseRepository biobankUniverseRepository;
    private final OntologyService ontologyService;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) VectorSpaceModelCollectionSimilarity.class);
    static final int DISTANCE = 5;
    private LoadingCache<OntologyTermMatch, Double> cachedOntologyTermSemanticRelateness = CacheBuilder.newBuilder().maximumSize(2000).expireAfterWrite(1, TimeUnit.HOURS).build(new CacheLoader<OntologyTermMatch, Double>() { // from class: org.molgenis.data.discovery.scoring.collections.VectorSpaceModelCollectionSimilarity.1
        @Override // com.google.common.cache.CacheLoader
        public Double load(OntologyTermMatch ontologyTermMatch) {
            return VectorSpaceModelCollectionSimilarity.this.ontologyService.related(ontologyTermMatch.getTarget(), ontologyTermMatch.getSource(), ontologyTermMatch.getStopLevel()) ? VectorSpaceModelCollectionSimilarity.this.ontologyService.getOntologyTermSemanticRelatedness(ontologyTermMatch.getTarget(), ontologyTermMatch.getSource()) : Double.valueOf(CMAESOptimizer.DEFAULT_STOPFITNESS);
        }
    });

    public VectorSpaceModelCollectionSimilarity(BiobankUniverseRepository biobankUniverseRepository, OntologyService ontologyService, IdGenerator idGenerator) {
        this.biobankUniverseRepository = (BiobankUniverseRepository) Objects.requireNonNull(biobankUniverseRepository);
        this.ontologyService = (OntologyService) Objects.requireNonNull(ontologyService);
        this.idGenerator = (IdGenerator) Objects.requireNonNull(idGenerator);
    }

    public List<BiobankUniverseMemberVector> createBiobankUniverseMemberVectors(BiobankUniverse biobankUniverse) {
        List<BiobankSampleCollection> members = biobankUniverse.getMembers();
        List list = (List) members.stream().map(biobankSampleCollection -> {
            return getOntologyTermFrequency(biobankSampleCollection, biobankUniverse);
        }).collect(Collectors.toList());
        List list2 = (List) list.stream().flatMap(map -> {
            return map.keySet().stream();
        }).distinct().collect(Collectors.toList());
        List list3 = (List) list.stream().map(map2 -> {
            return createVector(map2, list2);
        }).collect(Collectors.toList());
        Stream<BiobankSampleCollection> stream = members.stream();
        members.getClass();
        return (List) stream.map((v1) -> {
            return r1.indexOf(v1);
        }).map(num -> {
            return BiobankUniverseMemberVector.create(this.idGenerator.generateId(), (BiobankSampleCollection) members.get(num.intValue()), (double[]) list3.get(num.intValue()));
        }).collect(Collectors.toList());
    }

    public BiobankSampleCollectionSimilarity cosineValue(BiobankUniverseMemberVector biobankUniverseMemberVector, BiobankUniverseMemberVector biobankUniverseMemberVector2) {
        double[] point = biobankUniverseMemberVector.getPoint();
        double[] point2 = biobankUniverseMemberVector2.getPoint();
        float f = 0.0f;
        if (point.length != point2.length) {
            return BiobankSampleCollectionSimilarity.create(biobankUniverseMemberVector.getBiobankSampleCollection(), biobankUniverseMemberVector2.getBiobankSampleCollection(), PackedInts.COMPACT, "");
        }
        for (int i = 0; i < point.length; i++) {
            f = (float) (f + (point[i] * point2[i]));
        }
        float euclideanNorms = f / (euclideanNorms(point) * euclideanNorms(point2));
        return BiobankSampleCollectionSimilarity.create(biobankUniverseMemberVector.getBiobankSampleCollection(), biobankUniverseMemberVector2.getBiobankSampleCollection(), euclideanNorms, Math.round(euclideanNorms * 100.0f) + QuickTargetSourceCreator.PREFIX_THREAD_LOCAL);
    }

    double[] createVector(Map<OntologyTerm, Integer> map, List<OntologyTerm> list) {
        List list2 = (List) map.keySet().stream().flatMap(ontologyTerm -> {
            return findRelatedOntologyTerms(ontologyTerm, list).stream();
        }).collect(Collectors.toList());
        double[] dArr = new double[list.size()];
        Iterator it = list2.iterator();
        while (it.hasNext()) {
            int indexOf = list.indexOf(((OntologyTermMatch) ((Hit) it.next()).getResult()).getSource());
            dArr[indexOf] = dArr[indexOf] + r0.getScore();
        }
        return dArr;
    }

    private float euclideanNorms(double[] dArr) {
        return (float) Math.sqrt(DoubleStream.of(dArr).map(d -> {
            return Math.pow(d, 2.0d);
        }).sum());
    }

    private List<Hit<OntologyTermMatch>> findRelatedOntologyTerms(OntologyTerm ontologyTerm, Collection<OntologyTerm> collection) {
        ArrayList arrayList = new ArrayList();
        Iterator<OntologyTerm> it = collection.iterator();
        while (it.hasNext()) {
            try {
                OntologyTermMatch create = OntologyTermMatch.create(ontologyTerm, it.next(), 5);
                Double d = this.cachedOntologyTermSemanticRelateness.get(create);
                if (d.doubleValue() != CMAESOptimizer.DEFAULT_STOPFITNESS) {
                    arrayList.add(Hit.create(create, d.floatValue()));
                }
            } catch (ExecutionException e) {
                LOG.error(e.getMessage());
            }
        }
        return arrayList;
    }

    private List<OntologyTerm> getAllOntologyTerms(BiobankSampleCollection biobankSampleCollection, BiobankUniverse biobankUniverse) {
        List<SemanticType> keyConcepts = biobankUniverse.getKeyConcepts();
        return (List) this.biobankUniverseRepository.getBiobankSampleAttributes(biobankSampleCollection).stream().flatMap(biobankSampleAttribute -> {
            return biobankSampleAttribute.getTagGroups().stream();
        }).flatMap(identifiableTagGroup -> {
            return identifiableTagGroup.getOntologyTerms().stream().distinct();
        }).filter(ontologyTerm -> {
            return ontologyTerm.getSemanticTypes().stream().allMatch(semanticType -> {
                return !keyConcepts.contains(semanticType);
            });
        }).collect(Collectors.toList());
    }

    private Map<OntologyTerm, Integer> getOntologyTermFrequency(BiobankSampleCollection biobankSampleCollection, BiobankUniverse biobankUniverse) {
        List<OntologyTerm> allOntologyTerms = getAllOntologyTerms(biobankSampleCollection, biobankUniverse);
        Map<OntologyTerm, Integer> map = (Map) allOntologyTerms.stream().distinct().collect(Collectors.toMap(ontologyTerm -> {
            return ontologyTerm;
        }, ontologyTerm2 -> {
            return 0;
        }));
        for (OntologyTerm ontologyTerm3 : allOntologyTerms) {
            map.put(ontologyTerm3, Integer.valueOf(map.get(ontologyTerm3).intValue() + 1));
        }
        return map;
    }
}
