package com.sree.textbytes.jtopia.filter;

import com.sree.textbytes.StringHelpers.StopWords;
import com.sree.textbytes.StringHelpers.string;
import com.sree.textbytes.jtopia.helpers.DateUtils;
import com.sree.textbytes.jtopia.helpers.PunctuationRemover;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.text.WordUtils;
import org.apache.log4j.Logger;

/* loaded from: input_file:com/sree/textbytes/jtopia/filter/TermsFilter.class */
public class TermsFilter {
    public static Logger logger = Logger.getLogger(TermsFilter.class.getName());
    int singleStrengthMinOccur;
    int noLimitStrength;
    ArrayList<Integer> values;
    String term;
    DateUtils dtOffset = new DateUtils();

    public TermsFilter(int i, int i2) {
        this.singleStrengthMinOccur = i;
        this.noLimitStrength = i2;
    }

    public Map<String, ArrayList<Integer>> filterTerms(Map<String, Integer> map) {
        HashMap hashMap = new HashMap();
        new HashMap();
        for (String str : map.keySet()) {
            String str2 = str;
            Integer num = map.get(str);
            int length = str2.split(StringUtils.SPACE).length;
            if (!string.isNullOrEmpty(str2) && ((length == 1 && num.intValue() >= this.singleStrengthMinOccur) || length >= this.noLimitStrength)) {
                ArrayList<Integer> arrayList = new ArrayList<>();
                arrayList.add(num);
                arrayList.add(Integer.valueOf(length));
                hashMap.put(str2, arrayList);
            }
        }
        return cleanUp(hashMap);
    }

    private Map<String, ArrayList<Integer>> cleanUp(Map<String, ArrayList<Integer>> map) {
        return removeDuplicateSingleWords(removeDateTerms(removeStopWordsAndPunctuations(map)));
    }

    private Map<String, ArrayList<Integer>> removeDuplicateSingleWords(Map<String, ArrayList<Integer>> map) {
        Set<String> keySet = map.keySet();
        HashMap hashMap = new HashMap();
        try {
            for (String str : keySet) {
                this.term = str;
                this.values = new ArrayList<>();
                this.values = map.get(str);
                if (!(this.term.split(StringUtils.SPACE).length == 1 ? isWordPresent(this.term.trim(), keySet) : false)) {
                    hashMap.put(this.term, this.values);
                }
            }
        } catch (Exception e) {
            logger.error(e.toString(), e);
        }
        return hashMap;
    }

    private boolean isWordPresent(String str, Set set) {
        String lowerCase = str.trim().toLowerCase();
        Iterator it = set.iterator();
        while (it.hasNext()) {
            String str2 = (String) it.next();
            String[] split = str2.contains(StringUtils.SPACE) ? str2.split(StringUtils.SPACE) : null;
            if (split != null && split.length > 1) {
                for (String str3 : split) {
                    if (str3.trim().toLowerCase().equals(lowerCase)) {
                        return true;
                    }
                }
            }
        }
        return false;
    }

    private Map<String, ArrayList<Integer>> removeStopWordsAndPunctuations(Map<String, ArrayList<Integer>> map) {
        map.keySet().iterator();
        Set<String> stopWords = new StopWords().getStopWords();
        HashMap hashMap = new HashMap();
        try {
            for (String str : map.keySet()) {
                this.term = str;
                this.values = new ArrayList<>();
                this.values = map.get(str);
                this.term = this.term.replaceAll("\\s+", StringUtils.SPACE);
                String[] split = this.term.contains(StringUtils.SPACE) ? this.term.split(StringUtils.SPACE) : null;
                boolean z = false;
                boolean z2 = false;
                for (String str2 : stopWords) {
                    if (split != null) {
                        int i = 0;
                        while (true) {
                            if (i >= split.length) {
                                break;
                            }
                            z = false;
                            z2 = false;
                            if (split[i].trim().toLowerCase().equals(str2.trim())) {
                                z = true;
                                break;
                            }
                            if (hasDigitOrPunctuation(split[i].trim())) {
                                z2 = true;
                                break;
                            }
                            i++;
                        }
                    } else if (this.term.trim().toLowerCase().equals(str2.trim())) {
                        z = true;
                    } else if (hasDigitOrPunctuation(this.term.trim())) {
                        z2 = true;
                    }
                }
                int intValue = this.values.get(1).intValue();
                if (z || z2) {
                    logger.debug("Discarding term " + this.term + " isStopword " + z + " hasDigitonlyWord " + z2);
                } else {
                    this.term = removeApostrophes(this.term, intValue);
                    this.term = PunctuationRemover.remove(this.term);
                    this.term = this.term.replaceAll("[\\p{Punct}&&[^_.&\\\\/-]]", "").trim();
                    this.term = filterPunctuations(this.term.trim().toCharArray(), this.term);
                    if (this.term.length() > 3 && !isDigitOnly(this.term)) {
                        hashMap.put(this.term, this.values);
                    }
                }
            }
        } catch (Exception e) {
            logger.error(e.toString(), e);
        }
        return hashMap;
    }

    private Map<String, ArrayList<Integer>> removeDateTerms(Map<String, ArrayList<Integer>> map) {
        map.keySet().iterator();
        HashMap hashMap = new HashMap();
        for (String str : map.keySet()) {
            this.term = str;
            this.values = new ArrayList<>();
            this.values = map.get(str);
            String str2 = str;
            String[] split = str2.contains(StringUtils.SPACE) ? str2.split(StringUtils.SPACE) : null;
            boolean z = false;
            for (String str3 : this.dtOffset.getDateOffsets()) {
                if (split != null) {
                    int i = 0;
                    while (true) {
                        if (i >= split.length) {
                            break;
                        }
                        z = false;
                        if (split[i].trim().equals(str3.trim())) {
                            logger.debug("Date " + str3 + " found in " + str2);
                            z = true;
                            break;
                        }
                        i++;
                    }
                } else if (str2.trim().equals(str3.trim())) {
                    z = true;
                }
            }
            if (!z) {
                hashMap.put(WordUtils.capitalize(str2), this.values);
            }
        }
        return hashMap;
    }

    private boolean isDigitOnly(String str) {
        return Pattern.compile("^[0-9]+$").matcher(str).find();
    }

    private boolean hasDigitOrPunctuation(String str) {
        return Pattern.compile("^[0-9\\p{Punct}]+$").matcher(str).find();
    }

    private String filterPunctuations(char[] cArr, String str) {
        for (int i = 1; i < cArr.length - 1; i++) {
            try {
                char isPunctuation = isPunctuation(cArr[i]);
                if (isPunctuation != 'N') {
                    if (isPunctuation != '\\') {
                        if (!Character.isLetterOrDigit(str.charAt(i - 1)) || !Character.isLetterOrDigit(str.charAt(i + 1))) {
                            logger.debug("Next/Previous charactor is not letterorDigit " + str.charAt(i - 1) + StringUtils.SPACE + str.charAt(i + 1) + str + StringUtils.SPACE + i + StringUtils.SPACE + isPunctuation);
                            str = isBlank(str.charAt(i - 1)) ? removeBlankCharacters(str, i - 1) : isBlank(str.charAt(i + 1)) ? removeBlankCharacters(str, i + 1) : str.replace(isPunctuation, ' ');
                        }
                    } else if (!Character.isLetterOrDigit(str.charAt(i - 1))) {
                        logger.debug("Previous charactor is not letterorDigit " + str.charAt(i - 1));
                        if (isBlank(str.charAt(i - 1))) {
                            str = removeBlankCharacters(str, i - 1);
                        }
                        str = str.replace(isPunctuation, ' ').trim();
                    }
                }
            } catch (Exception e) {
                logger.error(e.toString(), e);
            }
        }
        return removeUnwantedCharacters(str);
    }

    public static String removeBlankCharacters(String str, int i) {
        return String.valueOf(str.substring(0, i)) + '#' + str.substring(i + 1);
    }

    public static boolean isBlank(char c) {
        return Character.isWhitespace(c);
    }

    public static String removeUnwantedCharacters(String str) {
        return str.replaceAll("#|,|”|“|’|‘", "").replaceAll("\\s+", StringUtils.SPACE).trim();
    }

    private String removeApostrophes(String str, int i) {
        if (str.contains("’s")) {
            str = str.replaceAll("’s", "");
            this.values.remove(1);
            this.values.add(Integer.valueOf(i - 1));
        }
        return str;
    }

    public static char isPunctuation(char c) {
        boolean z = false;
        if (c == '.' || c == '@' || c == '_' || c == '&' || c == '/' || c == '-' || c == '\\') {
            z = true;
        }
        if (z) {
            return c;
        }
        return 'N';
    }
}
