package org.molgenis.data.discovery.scoring.attributes;

import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.DoubleStream;
import java.util.stream.Stream;
import org.molgenis.data.discovery.scoring.attributes.AttributeSimilarity;
import org.molgenis.ontology.ic.TermFrequencyService;

/* loaded from: input_file:WEB-INF/lib/molgenis-data-discovery-2.0.0-SNAPSHOT.jar:org/molgenis/data/discovery/scoring/attributes/VectorSpaceModelAttributeSimilarity.class */
public class VectorSpaceModelAttributeSimilarity extends AttributeSimilarity {
    private static final String DIGIT_PATTERN = "\\d+";

    public VectorSpaceModelAttributeSimilarity(TermFrequencyService termFrequencyService) {
        super(AttributeSimilarity.SimilarityFunctionName.VSM, termFrequencyService);
    }

    @Override // org.molgenis.data.discovery.scoring.attributes.AttributeSimilarity
    public float score(String str, String str2, boolean z) {
        boolean z2 = !z;
        List<String> createTermTokens = createTermTokens(str, z2);
        List<String> createTermTokens2 = createTermTokens(str2, z2);
        List<String> list = (List) Stream.concat(createTermTokens.stream(), createTermTokens2.stream()).distinct().collect(Collectors.toList());
        double[] createVector = createVector(createTermTokens, list);
        double[] createVector2 = createVector(createTermTokens2, list);
        double d = 0.0d;
        for (int i = 0; i < list.size(); i++) {
            d += createVector[i] * createVector2[i];
        }
        return ((float) Math.round((d / (euclideanNorms(createVector) * euclideanNorms(createVector2))) * 1000.0d)) / 1000.0f;
    }

    private double euclideanNorms(double[] dArr) {
        return Math.sqrt(DoubleStream.of(dArr).map(d -> {
            return Math.pow(d, 2.0d);
        }).sum());
    }

    private double[] createVector(List<String> list, List<String> list2) {
        double[] dArr = new double[list2.size()];
        for (String str : list) {
            int indexOf = list2.indexOf(str);
            dArr[indexOf] = dArr[indexOf] + (str.matches(DIGIT_PATTERN) ? 1.0d : 1.0f * this.termFrequencyService.getTermFrequency(str));
        }
        return dArr;
    }
}
