package com.google.android.apps.translatedecoder.preprocess;

import com.google.android.apps.translatedecoder.decoder.g;
import com.google.android.apps.translatedecoder.util.DecoderRuntimeException;
import com.google.android.apps.translatedecoder.util.StringMap;
import com.google.android.apps.translatedecoder.util.StringSet;
import com.google.android.libraries.translate.offline.OfflineTranslationException;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.RandomAccessFile;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.logging.Logger;
import java.util.regex.Pattern;

/* loaded from: classes.dex */
public class Tokenizer implements Serializable {
    public static final Pattern CJ_PUNCTUATION_REGEX;
    public static final Pattern NUMBER_REGEX;
    public static final Pattern PUNCTUATION_REGEX;
    public static final Pattern SPACE_REGEX;

    /* renamed from: a, reason: collision with root package name */
    private static a f1670a = null;

    /* renamed from: b, reason: collision with root package name */
    private static final Map f1671b;

    /* renamed from: c, reason: collision with root package name */
    private static final Logger f1672c;
    private static final List d;
    private static final long serialVersionUID = -3438434462125278275L;
    private final CharNormalizer charNormalizer;
    private final boolean handleSpecialPuncts = true;
    private final StringSet preprocAbbrData;
    private final StringMap preprocMapData;

    static {
        HashMap hashMap = new HashMap();
        f1671b = hashMap;
        hashMap.put("!", "！");
        f1671b.put("?", "？");
        f1671b.put(",", "、");
        f1671b.put(".", "。");
        f1671b.put(":", "：");
        f1671b.put("%", "％");
        f1671b.put("#", "＃");
        f1671b.put("&", "＆");
        f1671b.put("(", "（");
        f1671b.put(")", "）");
        f1671b.put("~", "〜");
        PUNCTUATION_REGEX = Pattern.compile("^\\p{Punct}$");
        CJ_PUNCTUATION_REGEX = Pattern.compile("^(\\p{Punct}|，|、|。)$");
        SPACE_REGEX = Pattern.compile("^\\p{Space}$");
        NUMBER_REGEX = Pattern.compile("-*[\\d]+\\.[\\d]+");
        f1672c = Logger.getLogger("translate");
        ArrayList arrayList = new ArrayList();
        d = arrayList;
        arrayList.add("zh");
        d.add("ja");
        d.add("th");
        d.add("ko");
    }

    public Tokenizer(StringSet stringSet, StringMap stringMap, CharNormalizer charNormalizer) {
        this.preprocAbbrData = stringSet;
        this.preprocMapData = stringMap;
        this.charNormalizer = charNormalizer;
    }

    public static String deTokenize(String str, String str2) {
        if (isCJKT(str)) {
            return deTokenizeCjkt(str, str2);
        }
        StringBuilder sb = new StringBuilder();
        int i = 0;
        boolean z = false;
        while (i < str2.length()) {
            int codePointAt = str2.codePointAt(i);
            String str3 = new String(Character.toChars(codePointAt));
            if (z && !PUNCTUATION_REGEX.matcher(str3).find()) {
                sb.append(" ");
            }
            if (SPACE_REGEX.matcher(str3).find()) {
                z = true;
            } else {
                sb.append(str3);
                z = false;
            }
            i += Character.charCount(codePointAt);
        }
        return sb.toString();
    }

    public static String deTokenizeCjkt(String str, String str2) {
        boolean z;
        boolean z2;
        boolean z3;
        boolean z4;
        boolean z5;
        String str3;
        if (str2 == null) {
            return OfflineTranslationException.CAUSE_NULL;
        }
        if (!g.b(str) || f1670a == null) {
            if (str2.split("\\s+").length == 0) {
                return OfflineTranslationException.CAUSE_NULL;
            }
            StringBuilder sb = new StringBuilder();
            int i = 0;
            boolean z6 = true;
            boolean z7 = true;
            boolean z8 = false;
            boolean z9 = false;
            while (i < str2.length()) {
                int codePointAt = str2.codePointAt(i);
                String str4 = new String(Character.toChars(codePointAt));
                if (SPACE_REGEX.matcher(str4).find()) {
                    boolean z10 = !z9 ? z6 : z7;
                    boolean z11 = z8;
                    z2 = z10;
                    z4 = true;
                    z3 = false;
                    boolean z12 = z11;
                    z = z6;
                    z5 = z12;
                } else if (CJ_PUNCTUATION_REGEX.matcher(str4).find()) {
                    z2 = z7;
                    z3 = true;
                    z4 = false;
                    z = z6;
                    z5 = true;
                } else {
                    boolean z13 = !b.a(str4, d).isEmpty();
                    if (z9 && !z7 && !z13) {
                        sb.append(" ");
                    }
                    z = z13;
                    z2 = z13;
                    z3 = false;
                    z4 = false;
                    z5 = true;
                }
                if (z5) {
                    if ((!str.startsWith("ja") && !str.startsWith("zh")) || (str3 = (String) f1671b.get(str4)) == null) {
                        str3 = str4;
                    }
                    sb.append(str3);
                }
                i = Character.charCount(codePointAt) + i;
                z6 = z;
                z7 = z2;
                z8 = z3;
                z9 = z4;
            }
            return sb.toString();
        }
        if (a.f1675c == null) {
            return str2;
        }
        StringBuilder sb2 = new StringBuilder();
        int i2 = 0;
        boolean z14 = false;
        boolean z15 = false;
        boolean z16 = false;
        while (i2 < str2.length()) {
            int codePointAt2 = str2.codePointAt(i2);
            String str5 = new String(Character.toChars(codePointAt2));
            if (a.b(str5)) {
                StringBuilder sb3 = new StringBuilder();
                sb3.append(str5);
                int charCount = Character.charCount(codePointAt2);
                while (true) {
                    i2 += charCount;
                    if (i2 >= str2.length()) {
                        break;
                    }
                    int codePointAt3 = str2.codePointAt(i2);
                    String str6 = new String(Character.toChars(codePointAt3));
                    if (!a.b(str6)) {
                        break;
                    }
                    sb3.append(str6);
                    charCount = Character.charCount(codePointAt3);
                }
                if (z14 || (!z16 && sb2.length() > 0)) {
                    sb2.append(" ");
                }
                if (z16 && !a.f1675c.contains(sb3.toString())) {
                    sb2.append(" ");
                }
                sb2.append((CharSequence) sb3);
                z14 = false;
                z15 = false;
                z16 = true;
            } else {
                if (a.f1674b.matcher(str5).find()) {
                    z15 = true;
                } else {
                    boolean find = a.f1673a.matcher(str5).find();
                    if (z16 && !find) {
                        sb2.append(" ");
                    }
                    if (!z16 && !find && (z15 || z14)) {
                        sb2.append(" ");
                    }
                    sb2.append(str5);
                    z14 = find;
                    z15 = false;
                    z16 = false;
                }
                i2 += Character.charCount(codePointAt2);
            }
        }
        return sb2.toString();
    }

    public static String getAbbrSignature(String str, String str2) {
        String valueOf = String.valueOf(str);
        String valueOf2 = String.valueOf(str2);
        return valueOf2.length() != 0 ? valueOf.concat(valueOf2) : new String(valueOf);
    }

    public static String getMapSignature(String str, String str2) {
        String valueOf = String.valueOf(str);
        String valueOf2 = String.valueOf(str2);
        return valueOf2.length() != 0 ? valueOf.concat(valueOf2) : new String(valueOf);
    }

    public static boolean isCJKT(String str) {
        return str.equalsIgnoreCase("chinese") || str.toLowerCase(Locale.ENGLISH).startsWith("zh") || str.equalsIgnoreCase("japanese") || str.equalsIgnoreCase("ja") || str.equalsIgnoreCase("korean") || str.equalsIgnoreCase("ko") || str.equalsIgnoreCase("thai") || str.equalsIgnoreCase("th");
    }

    public static boolean isCjkt(String str) {
        return str.equalsIgnoreCase("chinese") || str.toLowerCase(Locale.ENGLISH).startsWith("zh") || str.equalsIgnoreCase("japanese") || str.equalsIgnoreCase("ja") || str.equalsIgnoreCase("thai") || str.equalsIgnoreCase("th");
    }

    public static Tokenizer readFromByteBuffer(ByteBuffer byteBuffer) {
        return new Tokenizer(StringSet.readFromByteBuffer(byteBuffer), StringMap.readFromByteBuffer(byteBuffer), CharNormalizer.readFromByteBuffer(byteBuffer));
    }

    public static Tokenizer readFromFile(String str, String str2) {
        Tokenizer tokenizer;
        if (str == null || str.isEmpty()) {
            throw new DecoderRuntimeException("TknRffNullFile");
        }
        try {
            FileChannel channel = new RandomAccessFile(str, "r").getChannel();
            MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_ONLY, 0L, (int) channel.size());
            try {
                if (map.getInt() == 1234504321) {
                    tokenizer = readFromByteBuffer(map);
                    tokenizer.initKoreanPostProcessing(str2);
                    channel.close();
                } else {
                    ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(str));
                    tokenizer = (Tokenizer) objectInputStream.readObject();
                    tokenizer.initKoreanPostProcessing(str2);
                    objectInputStream.close();
                    channel.close();
                }
                return tokenizer;
            } catch (Exception e) {
                e = e;
                str = str2;
                throw new DecoderRuntimeException("TknRffFailed", e, str);
            }
        } catch (Exception e2) {
            e = e2;
        }
    }

    public boolean getWordMap(String str, String str2, List list) {
        String str3 = this.preprocMapData.get(getMapSignature(str, str2));
        if (str3 == null) {
            return false;
        }
        list.clear();
        list.addAll(Arrays.asList(str3.split("\\s+")));
        return true;
    }

    public void initKoreanPostProcessing(String str) {
        if (str == null || f1670a != null) {
            return;
        }
        f1670a = new a();
        a.a(str);
    }

    public boolean isAbbrev(String str, String str2) {
        return this.preprocAbbrData.contains(getAbbrSignature(str, str2));
    }

    public boolean isNumber(String str) {
        return NUMBER_REGEX.matcher(str).matches();
    }

    public boolean isSpecialPunct(String str) {
        return str.matches("&apos;") || str.matches("&quot;");
    }

    public String replaceSpecialPuncts(String str) {
        return str.replace("&apos;", " &apos; ").replace("&quot;", " &quot; ");
    }

    public List tokenize(String str, String str2) {
        if (isCJKT(str)) {
            return tokenizeForCJK(str, str2);
        }
        String[] split = this.charNormalizer.normalizeChars(str2).split("\\s+");
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (String str3 : split) {
            if (isAbbrev(str, str3) || isNumber(str3) || isSpecialPunct(str3)) {
                arrayList.add(str3);
            } else if (getWordMap(str, str3, arrayList2)) {
                arrayList.addAll(arrayList2);
            } else {
                StringBuilder sb = null;
                int i = 0;
                while (i < str3.length()) {
                    int codePointAt = str3.codePointAt(i);
                    String str4 = new String(Character.toChars(codePointAt));
                    if (!PUNCTUATION_REGEX.matcher(str4).find()) {
                        if (sb == null) {
                            sb = new StringBuilder();
                        }
                        sb.append(str4);
                    } else if (str4.compareTo("'") == 0) {
                        char charAt = sb != null ? sb.charAt(sb.length() - 1) : (char) 0;
                        int codePointAt2 = Character.charCount(codePointAt) + i < str3.length() ? str3.codePointAt(Character.charCount(codePointAt) + i) : 0;
                        if (charAt == 'l' || charAt == 'L' || charAt == 'd' || charAt == 'D') {
                            sb.append(str4);
                            arrayList.add(sb.toString());
                            sb = null;
                        } else {
                            if (codePointAt2 == 115 && sb != null) {
                                arrayList.add(sb.toString());
                                sb = null;
                            }
                            if (sb == null) {
                                sb = new StringBuilder();
                            }
                            sb.append(str4);
                        }
                    } else {
                        if (sb != null) {
                            arrayList.add(sb.toString());
                        }
                        arrayList.add(str4);
                        sb = null;
                    }
                    i += Character.charCount(codePointAt);
                }
                if (sb != null) {
                    arrayList.add(sb.toString());
                }
            }
        }
        return arrayList;
    }

    public List tokenizeForCJK(String str, String str2) {
        String[] split = this.charNormalizer.normalizeChars(str2).split("\\s+");
        boolean z = str.equalsIgnoreCase("THAI") || str.toLowerCase().startsWith("th");
        ArrayList arrayList = new ArrayList();
        boolean z2 = z;
        for (int i = 0; i < split.length; i++) {
            String str3 = split[i];
            int i2 = 0;
            StringBuilder sb = null;
            while (i2 < str3.length()) {
                int codePointAt = str3.codePointAt(i2);
                String valueOf = String.valueOf(Character.toChars(codePointAt));
                if (codePointAt > 127) {
                    if (sb != null) {
                        arrayList.add(sb.toString());
                    }
                    arrayList.add(valueOf);
                    sb = null;
                } else {
                    if (sb == null) {
                        sb = new StringBuilder();
                    }
                    sb.append(valueOf);
                }
                i2 += Character.charCount(codePointAt);
                if (z2 && i == split.length - 1 && i2 >= str3.length() && valueOf.length() == 1 && (valueOf.equals(".") || valueOf.equals("!") || valueOf.equals("?") || valueOf.equals(";"))) {
                    z2 = false;
                }
            }
            if (sb != null) {
                arrayList.add(sb.toString());
            }
        }
        if (z2) {
            arrayList.add(".");
        }
        return arrayList;
    }

    public String tokenizeWithJoin(String str, String str2) {
        List list = tokenize(str, replaceSpecialPuncts(str2));
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (true) {
            int i2 = i;
            if (i2 >= list.size()) {
                return sb.toString();
            }
            sb.append((String) list.get(i2));
            if (i2 < list.size() - 1) {
                sb.append(" ");
            }
            i = i2 + 1;
        }
    }

    public void writeToByteBuffer(ByteBuffer byteBuffer) {
        this.preprocAbbrData.writeToByteBuffer(byteBuffer);
        this.preprocMapData.writeToByteBuffer(byteBuffer);
        this.charNormalizer.writeToByteBuffer(byteBuffer);
    }

    public void writeToFile(String str) {
        writeToFile(str, false);
    }

    public void writeToFile(String str, boolean z) {
        try {
            if (z) {
                RandomAccessFile randomAccessFile = new RandomAccessFile(str, "rw");
                FileChannel channel = randomAccessFile.getChannel();
                MappedByteBuffer map = channel.map(FileChannel.MapMode.READ_WRITE, 0L, 100000000L);
                map.putInt(1234504321);
                writeToByteBuffer(map);
                channel.truncate(map.position());
                randomAccessFile.close();
            } else {
                ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(str));
                objectOutputStream.writeObject(this);
                objectOutputStream.close();
            }
        } catch (IOException e) {
        }
    }
}
