package de.fau.cs.jstk.lm;

import de.fau.cs.jstk.arch.TokenHierarchy;
import de.fau.cs.jstk.arch.TokenTree;
import de.fau.cs.jstk.arch.Tokenization;
import de.fau.cs.jstk.arch.Tokenizer;
import de.fau.cs.jstk.arch.TreeNode;
import de.fau.cs.jstk.exceptions.OutOfVocabularyException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

/* loaded from: input_file:de/fau/cs/jstk/lm/Unigram.class */
public class Unigram implements LanguageModel {
    public static final float DEFAULT_OOV = 0.001f;
    private Tokenizer tok;
    private TokenHierarchy th;
    private HashMap<Tokenization, Float> sils;
    private float oovProb = 0.001f;
    private HashMap<Tokenization, Float> probs = new HashMap<>();

    public Unigram(Tokenizer tokenizer, TokenHierarchy tokenHierarchy, HashMap<Tokenization, Float> hashMap) {
        this.sils = new HashMap<>();
        this.tok = tokenizer;
        this.th = tokenHierarchy;
        this.sils = hashMap;
    }

    public void setProb(Tokenization tokenization, float f) {
        this.probs.put(tokenization, Float.valueOf(f));
    }

    public void setOovProb(float f) {
        this.oovProb = f;
    }

    public double getOovProb() {
        return this.oovProb;
    }

    public void loadSrilm(File file) throws IOException, OutOfVocabularyException {
        String readLine;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        do {
            readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
        } while (!readLine.equals("\\1-grams:"));
        while (true) {
            String readLine2 = bufferedReader.readLine();
            if (readLine2 == null || readLine2.startsWith("\\")) {
                return;
            }
            if (readLine2.trim().length() >= 3) {
                String[] split = readLine2.trim().split("\\s+");
                if (this.tok.validate(split[1])) {
                    this.probs.put(this.tok.getWordTokenization(split[1]), Float.valueOf((float) Math.pow(10.0d, Float.parseFloat(split[0]))));
                }
            }
        }
    }

    @Override // de.fau.cs.jstk.lm.LanguageModel
    public TreeNode generateNetwork() {
        float f = 0.0f;
        Iterator<Map.Entry<Tokenization, Float>> it = this.sils.entrySet().iterator();
        while (it.hasNext()) {
            f += it.next().getValue().floatValue();
        }
        float f2 = 0.0f;
        Iterator<Tokenization> it2 = this.tok.tokenizations.iterator();
        while (it2.hasNext()) {
            Tokenization next = it2.next();
            if (!this.sils.containsKey(next)) {
                Float f3 = this.probs.get(next);
                if (f3 == null) {
                    HashMap<Tokenization, Float> hashMap = this.probs;
                    Float valueOf = Float.valueOf(this.oovProb);
                    f3 = valueOf;
                    hashMap.put(next, valueOf);
                }
                f2 += f3.floatValue();
            }
        }
        float f4 = (1.0f - f) / f2;
        for (Tokenization tokenization : this.probs.keySet()) {
            this.probs.put(tokenization, Float.valueOf(this.probs.get(tokenization).floatValue() * f4));
        }
        TokenTree tokenTree = new TokenTree(0);
        Iterator<Tokenization> it3 = this.tok.tokenizations.iterator();
        while (it3.hasNext()) {
            Tokenization next2 = it3.next();
            if (this.sils.containsKey(next2)) {
                tokenTree.addToTree(next2, this.th.tokenizeWord(next2.sequence), this.sils.get(next2).floatValue());
            } else {
                tokenTree.addToTree(next2, this.th.tokenizeWord(next2.sequence), this.probs.get(next2).floatValue());
            }
        }
        tokenTree.factor();
        Iterator<TreeNode> it4 = tokenTree.leaves().iterator();
        while (it4.hasNext()) {
            it4.next().setLst(tokenTree.root);
        }
        return tokenTree.root;
    }
}
