/*
 * Decompiled with CFR 0.152.
 */
package org.forester.io.parsers;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import org.forester.surfacing.BasicDomain;
import org.forester.surfacing.BasicProtein;
import org.forester.surfacing.Domain;
import org.forester.surfacing.DomainId;
import org.forester.surfacing.Protein;
import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;

public final class HmmPfamOutputParser {
    private static final String RETRO = "RETRO";
    private static final String PHAGE = "PHAGE";
    private static final String VIR = "VIR";
    private static final String TRANSPOS = "TRANSPOS";
    private static final String RV = "RV";
    private static final String GAG = "GAG_";
    private static final String HCV = "HCV_";
    private static final String HERPES = "Herpes_";
    private static final int E_VALUE_MAXIMUM_DEFAULT = -1;
    private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
    private static final boolean IGNORE_DUFS_DEFAULT = false;
    private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
    private final Set<DomainId> _filter;
    private final FilterType _filter_type;
    private final File _input_file;
    private final String _species;
    private final String _model_type;
    private double _e_value_maximum;
    private Map<String, String> _individual_domain_score_cutoffs;
    private boolean _ignore_dufs;
    private boolean _ignore_virus_like_ids;
    private boolean _allow_non_unique_query;
    private boolean _verbose;
    private int _max_allowed_overlap;
    private boolean _ignore_engulfed_domains;
    private ReturnType _return_type;
    private int _proteins_encountered;
    private int _proteins_ignored_due_to_filter;
    private int _proteins_stored;
    private int _domains_encountered;
    private int _domains_ignored_due_to_duf;
    private int _domains_ignored_due_to_overlap;
    private int _domains_ignored_due_to_e_value;
    private int _domains_ignored_due_to_individual_score_cutoff;
    private int _domains_stored;
    private SortedSet<DomainId> _domains_stored_set;
    private long _time;
    private int _domains_ignored_due_to_negative_domain_filter;
    private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
    private int _domains_ignored_due_to_virus_like_id;
    private Map<String, Integer> _domains_ignored_due_to_virus_like_id_counts_map;

    public HmmPfamOutputParser(File input_file, String species, String model_type) {
        this._input_file = input_file;
        this._species = species;
        this._model_type = model_type;
        this._filter = null;
        this._filter_type = FilterType.NONE;
        this.init();
    }

    public HmmPfamOutputParser(File input_file, String species, String model_type, Set<DomainId> filter, FilterType filter_type) {
        this._input_file = input_file;
        this._species = species;
        this._model_type = model_type;
        this._filter = filter;
        this._filter_type = filter_type;
        this.init();
    }

    private void actuallyAddProtein(List<Protein> proteins, Protein current_protein) {
        List<Domain> l = current_protein.getProteinDomains();
        for (Domain d : l) {
            this.getDomainsStoredSet().add(d.getDomainId());
        }
        proteins.add(current_protein);
        ++this._proteins_stored;
    }

    private void addProtein(List<Protein> proteins, Protein current_protein) {
        if (this.getFilterType() == FilterType.POSITIVE_PROTEIN || this.getFilterType() == FilterType.NEGATIVE_PROTEIN) {
            HashSet<DomainId> domain_ids_in_protein = new HashSet<DomainId>();
            for (Domain d : current_protein.getProteinDomains()) {
                domain_ids_in_protein.add(d.getDomainId());
            }
            domain_ids_in_protein.retainAll(this.getFilter());
            if (this.getFilterType() == FilterType.POSITIVE_PROTEIN) {
                if (domain_ids_in_protein.size() > 0) {
                    this.actuallyAddProtein(proteins, current_protein);
                } else {
                    ++this._proteins_ignored_due_to_filter;
                }
            } else if (domain_ids_in_protein.size() < 1) {
                this.actuallyAddProtein(proteins, current_protein);
            } else {
                ++this._proteins_ignored_due_to_filter;
            }
        } else {
            this.actuallyAddProtein(proteins, current_protein);
        }
    }

    public int getDomainsEncountered() {
        return this._domains_encountered;
    }

    public int getDomainsIgnoredDueToDuf() {
        return this._domains_ignored_due_to_duf;
    }

    public int getDomainsIgnoredDueToEval() {
        return this._domains_ignored_due_to_e_value;
    }

    public int getDomainsIgnoredDueToIndividualScoreCutoff() {
        return this._domains_ignored_due_to_individual_score_cutoff;
    }

    public int getDomainsIgnoredDueToNegativeDomainFilter() {
        return this._domains_ignored_due_to_negative_domain_filter;
    }

    public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
        return this._domains_ignored_due_to_negative_domain_filter_counts_map;
    }

    public int getDomainsIgnoredDueToOverlap() {
        return this._domains_ignored_due_to_overlap;
    }

    public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
        return this._domains_ignored_due_to_virus_like_id_counts_map;
    }

    public int getDomainsIgnoredDueToVirusLikeIds() {
        return this._domains_ignored_due_to_virus_like_id;
    }

    public int getDomainsStored() {
        return this._domains_stored;
    }

    public SortedSet<DomainId> getDomainsStoredSet() {
        return this._domains_stored_set;
    }

    private double getEValueMaximum() {
        return this._e_value_maximum;
    }

    private Set<DomainId> getFilter() {
        return this._filter;
    }

    private FilterType getFilterType() {
        return this._filter_type;
    }

    private Map<String, String> getIndividualDomainScoreCutoffs() {
        return this._individual_domain_score_cutoffs;
    }

    private File getInputFile() {
        return this._input_file;
    }

    private int getMaxAllowedOverlap() {
        return this._max_allowed_overlap;
    }

    private String getModelType() {
        return this._model_type;
    }

    public int getProteinsEncountered() {
        return this._proteins_encountered;
    }

    public int getProteinsIgnoredDueToFilter() {
        return this._proteins_ignored_due_to_filter;
    }

    public int getProteinsStored() {
        return this._proteins_stored;
    }

    private ReturnType getReturnType() {
        return this._return_type;
    }

    private String getSpecies() {
        return this._species;
    }

    public long getTime() {
        return this._time;
    }

    private void init() {
        this._e_value_maximum = -1.0;
        this.setIgnoreDufs(false);
        this.setReturnType(RETURN_TYPE_DEFAULT);
        this._max_allowed_overlap = -1;
        this.setIndividualDomainScoreCutoffs(null);
        this.setIgnoreEngulfedDomains(false);
        this.setIgnoreVirusLikeIds(false);
        this.setAllowNonUniqueQuery(false);
        this.setVerbose(false);
        this.intitCounts();
    }

    private void intitCounts() {
        this.setDomainsStoredSet(new TreeSet<DomainId>());
        this.setDomainsEncountered(0);
        this.setProteinsEncountered(0);
        this.setProteinsIgnoredDueToFilter(0);
        this.setDomainsIgnoredDueToNegativeFilter(0);
        this.setDomainsIgnoredDueToDuf(0);
        this.setDomainsIgnoredDueToEval(0);
        this.setDomainsIgnoredDueToIndividualScoreCutoff(0);
        this.setDomainsIgnoredDueToVirusLikeId(0);
        this.setDomainsIgnoredDueToOverlap(0);
        this.setDomainsStored(0);
        this.setProteinsStored(0);
        this.setTime(0L);
        this.setDomainsIgnoredDueToVirusLikeIdCountsMap(new TreeMap<String, Integer>());
        this.setDomainsIgnoredDueToNegativeDomainFilterCountsMap(new TreeMap<String, Integer>());
    }

    private boolean isAllowNonUniqueQuery() {
        return this._allow_non_unique_query;
    }

    private boolean isIgnoreDufs() {
        return this._ignore_dufs;
    }

    private boolean isIgnoreEngulfedDomains() {
        return this._ignore_engulfed_domains;
    }

    private boolean isIgnoreVirusLikeIds() {
        return this._ignore_virus_like_ids;
    }

    private boolean isVerbose() {
        return this._verbose;
    }

    public List<Protein> parse() throws IOException {
        String line;
        this.intitCounts();
        HashSet<String> queries = new HashSet<String>();
        String error = ForesterUtil.isReadableFile(this.getInputFile());
        if (!ForesterUtil.isEmpty(error)) {
            throw new IOException(error);
        }
        BufferedReader br = new BufferedReader(new FileReader(this.getInputFile()));
        ArrayList<Protein> proteins = new ArrayList<Protein>();
        Protein current_protein = null;
        int line_number = 0;
        boolean saw_double_slash = true;
        boolean can_parse_domains = false;
        boolean saw_parsed_for_domains = false;
        boolean saw_query_sequence = false;
        boolean was_not_unique = false;
        long start_time = new Date().getTime();
        while ((line = br.readLine()) != null) {
            ++line_number;
            if (line.length() < 1) continue;
            if (line.startsWith("Query sequence:")) {
                ++this._proteins_encountered;
                if (!saw_double_slash) {
                    throw new IOException("unexpected format [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
                saw_double_slash = false;
                saw_query_sequence = true;
                was_not_unique = false;
                String query = line.substring(16).trim();
                if (ForesterUtil.isEmpty(query)) {
                    throw new IOException("query sequence cannot be empty [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
                if (queries.contains(query)) {
                    if (!this.isAllowNonUniqueQuery()) {
                        throw new IOException("query \"" + query + "\" is not unique [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                    }
                    if (this.isVerbose()) {
                        ForesterUtil.printWarningMessage(this.getClass().getName(), "query \"" + query + "\" is not unique [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                    }
                } else {
                    queries.add(query);
                }
                if (current_protein != null) {
                    throw new IOException("unexpected format [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
                if (this.getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN) {
                    current_protein = new BasicProtein(query, this.getSpecies());
                    continue;
                }
                throw new IllegalArgumentException("unknown return type");
            }
            if (line.startsWith("Accession:")) {
                if (!saw_query_sequence || current_protein == null) {
                    throw new IOException("unexpected format [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
                ((BasicProtein)current_protein).setAccession(line.substring(11).trim());
                continue;
            }
            if (line.startsWith("Description:")) {
                if (!saw_query_sequence || current_protein == null) {
                    throw new IOException("unexpected format [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
                if (was_not_unique) {
                    if (this.getReturnType() != ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN) continue;
                    current_protein = new BasicProtein(current_protein.getProteinId() + " " + line.substring(13).trim(), this.getSpecies());
                    continue;
                }
                ((BasicProtein)current_protein).setDescription(line.substring(13).trim());
                continue;
            }
            if (line.startsWith("Parsed for domains:")) {
                if (!saw_query_sequence) {
                    throw new IOException("unexpected format [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
                saw_query_sequence = false;
                saw_parsed_for_domains = true;
                continue;
            }
            if (saw_parsed_for_domains && line.startsWith("--------")) {
                can_parse_domains = true;
                saw_parsed_for_domains = false;
                continue;
            }
            if (line.startsWith("Alignments of top-scoring domains:")) {
                if (!can_parse_domains) {
                    throw new IOException("unexpected format [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
                can_parse_domains = false;
                continue;
            }
            if (line.startsWith("//")) {
                can_parse_domains = false;
                saw_double_slash = true;
                if (current_protein.getProteinDomains().size() > 0) {
                    if (this.getMaxAllowedOverlap() != -1 || this.isIgnoreEngulfedDomains()) {
                        int domains_count = current_protein.getNumberOfProteinDomains();
                        current_protein = SurfacingUtil.removeOverlappingDomains(this.getMaxAllowedOverlap(), this.isIgnoreEngulfedDomains(), current_protein);
                        int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
                        this._domains_stored -= domains_removed;
                        this._domains_ignored_due_to_overlap += domains_removed;
                    }
                    this.addProtein(proteins, current_protein);
                }
                current_protein = null;
                continue;
            }
            if (!can_parse_domains || line.indexOf("[no hits above thresholds]") != -1) continue;
            String[] s = line.split("\\s+");
            if (s.length != 10) {
                throw new IOException("unexpected format in hmmpfam output:  \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            String id = s[0];
            String domain_count_str = s[1];
            String from_str = s[2];
            String to_str = s[3];
            String query_match_str = s[4];
            String hmm_match_str = s[7];
            String score_str = s[8];
            String e_value_str = s[9];
            int from = -1;
            int to = -1;
            double e_value = -1.0;
            double score = -1.0;
            boolean is_complete_hmm_match = false;
            boolean is_complete_query_match = false;
            try {
                from = Integer.valueOf(from_str);
            }
            catch (NumberFormatException e) {
                throw new IOException("could not parse seq-f from \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            try {
                to = Integer.valueOf(to_str);
            }
            catch (NumberFormatException e) {
                throw new IOException("could not parse seq-t from \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            try {
                score = Double.valueOf(score_str);
            }
            catch (NumberFormatException e) {
                throw new IOException("could not parse score from \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            try {
                e_value = Double.valueOf(e_value_str);
            }
            catch (NumberFormatException e) {
                throw new IOException("could not parse E-value from \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            if (hmm_match_str.equals("[]")) {
                is_complete_hmm_match = true;
            } else if (!(hmm_match_str.equals(".]") || hmm_match_str.equals("[.") || hmm_match_str.equals(".."))) {
                throw new IOException("unexpected format in hmmpfam output:  \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            if (query_match_str.equals("..")) {
                is_complete_query_match = true;
            } else if (!(query_match_str.equals(".]") || query_match_str.equals("[.") || query_match_str.equals("[]"))) {
                throw new IOException("unexpected format in hmmpfam output:  \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            String[] ct = domain_count_str.split("/");
            if (ct.length != 2) {
                throw new IOException("unexpected format in hmmpfam output:  \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            String number_str = ct[0];
            String total_str = ct[1];
            int number = -1;
            int total = -1;
            try {
                number = Integer.valueOf(number_str);
            }
            catch (NumberFormatException e) {
                throw new IOException("could not parse domain number from \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            try {
                total = Integer.valueOf(total_str);
            }
            catch (NumberFormatException e) {
                throw new IOException("could not parse domain count from \"" + line + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
            }
            ++this._domains_encountered;
            boolean failed_cutoff = false;
            if (this.getIndividualDomainScoreCutoffs() != null) {
                if (this.getIndividualDomainScoreCutoffs().containsKey(id)) {
                    double cutoff = Double.parseDouble(this.getIndividualDomainScoreCutoffs().get(id));
                    if (score < cutoff) {
                        failed_cutoff = true;
                    }
                } else {
                    throw new IOException("could not find a score cutoff value for domain id \"" + id + "\" [line " + line_number + "] in [" + this.getInputFile().getCanonicalPath() + "]");
                }
            }
            String uc_id = id.toUpperCase();
            if (failed_cutoff) {
                ++this._domains_ignored_due_to_individual_score_cutoff;
                continue;
            }
            if (this.getEValueMaximum() != -1.0 && e_value > this.getEValueMaximum()) {
                ++this._domains_ignored_due_to_e_value;
                continue;
            }
            if (this.isIgnoreDufs() && uc_id.startsWith("DUF")) {
                ++this._domains_ignored_due_to_duf;
                continue;
            }
            if (this.isIgnoreVirusLikeIds() && (uc_id.contains(VIR) || uc_id.contains(PHAGE) || uc_id.contains(RETRO) || uc_id.contains(TRANSPOS) || uc_id.startsWith(RV) || uc_id.startsWith(GAG) || uc_id.startsWith(HCV) || uc_id.startsWith(HERPES))) {
                ForesterUtil.increaseCountingMap(this.getDomainsIgnoredDueToVirusLikeIdCountsMap(), id);
                ++this._domains_ignored_due_to_virus_like_id;
                continue;
            }
            if (this.getFilterType() == FilterType.NEGATIVE_DOMAIN && this.getFilter().contains(new DomainId(id))) {
                ++this._domains_ignored_due_to_negative_domain_filter;
                ForesterUtil.increaseCountingMap(this.getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), id);
                continue;
            }
            BasicDomain pd = new BasicDomain(id, from, to, (short)number, (short)total, e_value, score);
            current_protein.addProteinDomain(pd);
            ++this._domains_stored;
        }
        this.setTime(new Date().getTime() - start_time);
        if (!saw_double_slash) {
            throw new IOException("file ends unexpectedly [line " + line_number + "]");
        }
        return proteins;
    }

    public void setAllowNonUniqueQuery(boolean allow_non_unique_query) {
        this._allow_non_unique_query = allow_non_unique_query;
    }

    private void setDomainsEncountered(int domains_encountered) {
        this._domains_encountered = domains_encountered;
    }

    private void setDomainsIgnoredDueToDuf(int domains_ignored_due_to_duf) {
        this._domains_ignored_due_to_duf = domains_ignored_due_to_duf;
    }

    public void setDomainsIgnoredDueToEval(int domains_ignored_due_to_e_value) {
        this._domains_ignored_due_to_e_value = domains_ignored_due_to_e_value;
    }

    public void setDomainsIgnoredDueToIndividualScoreCutoff(int domains_ignored_due_to_individual_score_cutoff) {
        this._domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
    }

    private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap(Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map) {
        this._domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
    }

    private void setDomainsIgnoredDueToNegativeFilter(int domains_ignored_due_to_negative_domain_filter) {
        this._domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
    }

    private void setDomainsIgnoredDueToOverlap(int domains_ignored_due_to_overlap) {
        this._domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
    }

    private void setDomainsIgnoredDueToVirusLikeId(int i) {
        this._domains_ignored_due_to_virus_like_id = i;
    }

    private void setDomainsIgnoredDueToVirusLikeIdCountsMap(Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map) {
        this._domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
    }

    private void setDomainsStored(int domains_stored) {
        this._domains_stored = domains_stored;
    }

    private void setDomainsStoredSet(SortedSet<DomainId> _storeddomains_stored) {
        this._domains_stored_set = _storeddomains_stored;
    }

    public void setEValueMaximum(double e_value_maximum) {
        if (e_value_maximum < 0.0) {
            throw new IllegalArgumentException("attempt to set the maximum E-value to a negative value");
        }
        this._e_value_maximum = e_value_maximum;
    }

    public void setIgnoreDufs(boolean ignore_dufs) {
        this._ignore_dufs = ignore_dufs;
    }

    public void setIgnoreEngulfedDomains(boolean ignore_engulfed_domains) {
        this._ignore_engulfed_domains = ignore_engulfed_domains;
    }

    public void setIgnoreVirusLikeIds(boolean ignore_virus_like_ids) {
        this._ignore_virus_like_ids = ignore_virus_like_ids;
    }

    public void setIndividualDomainScoreCutoffs(Map<String, String> individual_domain_score_cutoffs) {
        this._individual_domain_score_cutoffs = individual_domain_score_cutoffs;
    }

    public void setMaxAllowedOverlap(int max_allowed_overlap) {
        if (max_allowed_overlap < 0) {
            throw new IllegalArgumentException("Attempt to set max allowed overlap to less than zero.");
        }
        this._max_allowed_overlap = max_allowed_overlap;
    }

    private void setProteinsEncountered(int proteins_encountered) {
        this._proteins_encountered = proteins_encountered;
    }

    private void setProteinsIgnoredDueToFilter(int proteins_ignored_due_to_filter) {
        this._proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
    }

    private void setProteinsStored(int proteins_stored) {
        this._proteins_stored = proteins_stored;
    }

    public void setReturnType(ReturnType return_type) {
        this._return_type = return_type;
    }

    private void setTime(long time) {
        this._time = time;
    }

    public void setVerbose(boolean verbose) {
        this._verbose = verbose;
    }

    public static enum FilterType {
        NONE,
        POSITIVE_PROTEIN,
        NEGATIVE_PROTEIN,
        NEGATIVE_DOMAIN;

    }

    public static enum ReturnType {
        UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;

    }
}

