package de.fau.cs.jstk.lm;

import de.fau.cs.jstk.arch.TokenHierarchy;
import de.fau.cs.jstk.arch.TokenTree;
import de.fau.cs.jstk.arch.Tokenization;
import de.fau.cs.jstk.arch.Tokenizer;
import de.fau.cs.jstk.arch.TreeNode;
import de.fau.cs.jstk.exceptions.OutOfVocabularyException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.log4j.Logger;
import org.xiph.speex.NbCodec;

/* loaded from: input_file:de/fau/cs/jstk/lm/Bigram.class */
public class Bigram implements LanguageModel {
    private static Logger logger = Logger.getLogger(Bigram.class);
    public static final float DEFAULT_OOV = 0.001f;
    private Tokenizer tok;
    private TokenHierarchy th;
    private HashMap<Tokenization, Float> sils;
    private float oovProb = 0.001f;
    private HashMap<N1gram, Float> p1 = new HashMap<>();
    private HashMap<N2gram, Float> p2 = new HashMap<>();

    /* loaded from: input_file:de/fau/cs/jstk/lm/Bigram$N1gram.class */
    private static class N1gram {
        Tokenization t;
        float p;
        float backoff;

        N1gram(Tokenization tokenization, float f, float f2) {
            this.t = tokenization;
            this.p = f;
            this.backoff = f2;
        }

        public boolean equals(Object obj) {
            if (obj instanceof N1gram) {
                return this.t.equals(((N1gram) obj).t);
            }
            return false;
        }
    }

    /* loaded from: input_file:de/fau/cs/jstk/lm/Bigram$N2gram.class */
    private static class N2gram {
        Tokenization ta;
        Tokenization tb;
        float p;

        N2gram(Tokenization tokenization, Tokenization tokenization2, float f) {
            this.ta = tokenization;
            this.tb = tokenization2;
            this.p = f;
        }

        public boolean equals(Object obj) {
            return (obj instanceof N2gram) && this.ta.equals(((N2gram) obj).ta) && this.tb.equals(((N2gram) obj).tb);
        }
    }

    public Bigram(Tokenizer tokenizer, TokenHierarchy tokenHierarchy, HashMap<Tokenization, Float> hashMap) {
        this.tok = tokenizer;
        this.th = tokenHierarchy;
        this.sils = hashMap;
    }

    public void loadSrilm(File file) throws IOException, OutOfVocabularyException {
        String readLine;
        String str;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        do {
            readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
        } while (!readLine.equals("\\1-grams:"));
        while (true) {
            String readLine2 = bufferedReader.readLine();
            str = readLine2;
            if (readLine2 != null && !str.startsWith("\\")) {
                if (str.trim().length() < 3) {
                    logger.warn("ignoring wrong formatted line: " + str);
                } else {
                    String[] split = str.trim().split("\\s+");
                    if (this.tok.validate(split[1])) {
                        N1gram n1gram = new N1gram(this.tok.getWordTokenization(split[1]), (float) Math.pow(10.0d, Float.parseFloat(split[0])), split.length > 2 ? (float) Math.pow(10.0d, Float.parseFloat(split[2])) : NbCodec.VERY_SMALL);
                        this.p1.put(n1gram, Float.valueOf(n1gram.p));
                    } else {
                        logger.warn("ignoring unknown token: " + split[1]);
                    }
                }
            }
        }
        logger.info("loaded " + this.p1.size() + " uni-grams");
        while (!str.equals("\\2-grams:")) {
            String readLine3 = bufferedReader.readLine();
            str = readLine3;
            if (readLine3 == null) {
                break;
            }
        }
        while (true) {
            String readLine4 = bufferedReader.readLine();
            if (readLine4 != null && !readLine4.startsWith("\\")) {
                if (readLine4.trim().length() < 3) {
                    logger.warn("ignoring wrong formatted line: " + readLine4);
                } else {
                    String[] split2 = readLine4.trim().split("\\s+");
                    if (!this.tok.validate(split2[1])) {
                        logger.warn("ignoring unknown token: " + split2[1]);
                    } else if (this.tok.validate(split2[2])) {
                        N2gram n2gram = new N2gram(this.tok.getWordTokenization(split2[1]), this.tok.getWordTokenization(split2[2]), (float) Math.pow(10.0d, Float.parseFloat(split2[0])));
                        this.p2.put(n2gram, Float.valueOf(n2gram.p));
                    } else {
                        logger.warn("ignoring unknown token: " + split2[2]);
                    }
                }
            }
        }
        logger.info("loaded " + this.p2.size() + " bi-grams");
    }

    @Override // de.fau.cs.jstk.lm.LanguageModel
    public TreeNode generateNetwork() {
        float floatValue;
        logger.info("building initial uni-gram tree");
        int i = 0 + 1;
        TokenTree tokenTree = new TokenTree(0);
        for (Map.Entry<Tokenization, Float> entry : this.sils.entrySet()) {
            tokenTree.addToTree(entry.getKey(), this.th.tokenizeWord(entry.getKey().sequence), entry.getValue().floatValue());
        }
        Iterator<Tokenization> it = this.tok.tokenizations.iterator();
        while (it.hasNext()) {
            Tokenization next = it.next();
            Float f = this.p1.get(new N1gram(next, NbCodec.VERY_SMALL, NbCodec.VERY_SMALL));
            if (f == null) {
                f = Float.valueOf(this.oovProb);
            }
            tokenTree.addToTree(next, this.th.tokenizeWord(next.sequence), f.floatValue());
        }
        System.err.println(TokenTree.traverseNetwork(tokenTree.root, "  "));
        tokenTree.factor();
        logger.info("building bi-gram LSTs");
        TokenTree[] tokenTreeArr = new TokenTree[this.tok.tokenizations.size()];
        HashMap hashMap = new HashMap();
        int i2 = 0;
        Iterator<Tokenization> it2 = this.tok.tokenizations.iterator();
        while (it2.hasNext()) {
            Tokenization next2 = it2.next();
            int i3 = i;
            i++;
            tokenTreeArr[i2] = new TokenTree(i3);
            hashMap.put(next2.word, tokenTreeArr[i2]);
            i2++;
        }
        Iterator<Tokenization> it3 = this.tok.tokenizations.iterator();
        while (it3.hasNext()) {
            Tokenization next3 = it3.next();
            if (!this.sils.containsKey(next3)) {
                TokenTree tokenTree2 = (TokenTree) hashMap.get(next3.word);
                Iterator<Tokenization> it4 = this.tok.tokenizations.iterator();
                while (it4.hasNext()) {
                    Tokenization next4 = it4.next();
                    if (this.sils.containsKey(next4)) {
                        floatValue = this.sils.get(next4).floatValue();
                    } else {
                        Float f2 = this.p2.get(new N2gram(next3, next4, NbCodec.VERY_SMALL));
                        if (f2 == null) {
                            N1gram n1gram = new N1gram(next3, NbCodec.VERY_SMALL, NbCodec.VERY_SMALL);
                            N1gram n1gram2 = new N1gram(next4, NbCodec.VERY_SMALL, NbCodec.VERY_SMALL);
                            boolean z = false;
                            boolean z2 = false;
                            for (N1gram n1gram3 : this.p1.keySet()) {
                                if (n1gram.equals(n1gram3)) {
                                    n1gram = n1gram3;
                                    z = true;
                                } else if (n1gram2.equals(n1gram3)) {
                                    n1gram2 = n1gram3;
                                    z2 = true;
                                }
                                if (z && z2) {
                                    break;
                                }
                            }
                            floatValue = (((double) n1gram.p) == 0.0d ? this.oovProb : n1gram.backoff) * (((double) n1gram2.p) == 0.0d ? this.oovProb : n1gram2.p);
                        } else {
                            floatValue = f2.floatValue();
                        }
                    }
                    tokenTree2.addToTree(next4, this.th.tokenizeWord(next4.sequence), floatValue);
                }
                tokenTree2.factor();
                for (TreeNode treeNode : tokenTree2.leaves()) {
                    if (this.sils.containsKey(treeNode.word)) {
                        treeNode.setLst(tokenTree2.root);
                    } else {
                        treeNode.setLst(((TokenTree) hashMap.get(treeNode.word.word)).root);
                    }
                }
                if (i2 % 100 == 0) {
                    System.gc();
                    logger.info(String.valueOf(i2) + " mem usage=" + (((Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory()) / 1024) / 1024) + "M");
                }
            }
        }
        for (TreeNode treeNode2 : tokenTree.leaves()) {
            if (this.sils.containsKey(treeNode2.word)) {
                treeNode2.setLst(tokenTree.root);
            } else {
                treeNode2.setLst(((TokenTree) hashMap.get(treeNode2.word.word)).root);
            }
        }
        return tokenTree.root;
    }
}
