/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.dropseqrna.annotation;

import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import java.io.File;
import java.io.PrintStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.dropseqrna.annotation.EnhanceGTFRecords;
import org.broadinstitute.dropseqrna.annotation.GTFParser;
import org.broadinstitute.dropseqrna.annotation.GTFRecord;
import org.broadinstitute.dropseqrna.annotation.GenomicOrderComparator;
import org.broadinstitute.dropseqrna.cmdline.MetaData;
import org.broadinstitute.dropseqrna.utils.DropSeqSamUtil;
import org.broadinstitute.dropseqrna.utils.FilteredIterator;
import org.broadinstitute.dropseqrna.utils.io.ErrorCheckingPrintStream;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;

@CommandLineProgramProperties(usage="GTF files are annoyingly complex with a poor definition of what data is in them. So hey, why not write a file parser. This program reduces the GTF file in to a simplier, easier to parse format, while simultaneously allowing for data to be filtered.", usageShort="Parse and simplify a GTF file into an easier to use format.", programGroup=MetaData.class)
public class ReduceGTF
extends CommandLineProgram {
    private static final Log log = Log.getInstance(ReduceGTF.class);
    private static final List<String> DEFAULT_FEATURE_TYPES = CollectionUtil.makeList((Object[])new String[]{"gene", "transcript", "exon"});
    private static final List<String> DEFAULT_IGNORED_FUNC_TYPES = CollectionUtil.makeList((Object[])new String[]{"pseudogene", "polymorphic_pseudogene", "TR_J_pseudogene", "TR_V_pseudogene", "IG_C_pseudogene", "IG_J_pseudogene", "IG_V_pseudogene"});
    private static final String NA = "NA";
    @Option(shortName="SD", doc="The reference sequence dictionary.  Only chromosomes found in this file AND the GTF file will be retained.")
    public File SEQUENCE_DICTIONARY;
    @Option(doc="The GTF file to reduce")
    public File GTF;
    @Option(shortName="O", doc="The output reduced GTF file.")
    public File OUTPUT;
    @Option(doc="Feature type(s) to extract. Only lines of the GTF that have these feature types will be extracted.  This is the 3rd field of the GTF file, some examples of standard feature types are CDS, start_codon, stop_codon, and exon. ")
    public List<String> FEATURE_TYPE = DEFAULT_FEATURE_TYPES;
    @Option(doc="Functional type(s) to ignore.  These are values in the FUNCTIONAL_FIELD column in the GTF file.")
    public List<String> IGNORE_FUNC_TYPE = DEFAULT_IGNORED_FUNC_TYPES;
    @Option(doc="Enhance this reduced GTF file with genes,transcripts,introns, and consensus introns.  This is real handy when your GTF file only defines exons, but has the transcript and gene IDs they belong to.")
    public boolean ENHANCE_GTF = true;
    private SAMSequenceDictionary dict;
    private boolean initialized = false;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected int doWork() {
        IOUtil.assertFileIsReadable((File)this.SEQUENCE_DICTIONARY);
        IOUtil.assertFileIsReadable((File)this.GTF);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        this.initialize();
        FilteringGTFParser parser = this.parseGTF();
        try {
            ErrorCheckingPrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting((File)this.OUTPUT));
            this.writeHeader(out);
            if (!this.ENHANCE_GTF) {
                this.writeRecords(out, (Iterable<GTFRecord>)((Object)parser));
            } else {
                EnhanceGTFRecords e = new EnhanceGTFRecords();
                List<GTFRecord> records = e.enhanceGTFRecords((Iterator<GTFRecord>)((Object)parser));
                Collections.sort(records, new GenomicOrderComparator(this.dict));
                this.writeRecords(out, records);
            }
            ((PrintStream)out).close();
            int n = 0;
            return n;
        }
        finally {
            CloserUtil.close((Object)((Object)parser));
        }
    }

    private void initialize() {
        if (!this.initialized) {
            this.dict = DropSeqSamUtil.loadSequenceDictionary(this.SEQUENCE_DICTIONARY);
            this.initialized = true;
        }
    }

    FilteringGTFParser parseGTF() {
        this.initialize();
        return new FilteringGTFParser(this.GTF);
    }

    private void writeHeader(PrintStream out) {
        Object[] line = new String[]{"chr", "start", "end", "strand", "gene_name", "gene_id", "transcript_name", "transcript_id", "transcriptType", "annotationType"};
        String h = StringUtils.join((Object[])line, (String)"\t");
        out.println(h);
    }

    private void writeRecords(PrintStream out, Iterable<GTFRecord> records) {
        for (GTFRecord r : records) {
            this.writeLine(r, out);
        }
    }

    private void writeLine(GTFRecord r, PrintStream out) {
        if (r == null) {
            return;
        }
        Object[] line = new String[]{r.getChromosome(), new Integer(r.getStart()).toString(), new Integer(r.getEnd()).toString(), r.getStrandAsString(), r.getGeneName(), r.getGeneID(), r.getTranscriptName(), r.getTranscriptID(), r.getTranscriptType(), r.getFeatureType()};
        for (int i = 0; i < line.length; ++i) {
            if (line[i] != null && !((String)line[i]).isEmpty()) continue;
            line[i] = NA;
        }
        String h = StringUtils.join((Object[])line, (String)"\t");
        out.println(h);
    }

    public static void main(String[] args) {
        System.exit(new ReduceGTF().instanceMain(args));
    }

    private class FilteringGTFParser
    extends FilteredIterator<GTFRecord> {
        final Set<String> featureTypes;
        final Set<String> ignoredFunctionalTypes;

        private FilteringGTFParser(File gtf) {
            super(new GTFParser(gtf, ValidationStringency.STRICT));
            this.featureTypes = new HashSet<String>(ReduceGTF.this.FEATURE_TYPE);
            this.ignoredFunctionalTypes = new HashSet<String>(ReduceGTF.this.IGNORE_FUNC_TYPE);
        }

        @Override
        public boolean filterOut(GTFRecord rec) {
            return this.ignoredFunctionalTypes.contains(rec.getTranscriptType()) || !this.featureTypes.contains(rec.getFeatureType()) || ReduceGTF.this.dict.getSequence(rec.getChromosome()) == null;
        }
    }
}

