/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.dropseqrna.barnyard;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMProgramRecord;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;
import htsjdk.samtools.util.ProgressLogger;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.broadinstitute.dropseqrna.TranscriptomeException;
import org.broadinstitute.dropseqrna.annotation.GeneAnnotationReader;
import org.broadinstitute.dropseqrna.barnyard.BarcodeListRetrieval;
import org.broadinstitute.dropseqrna.barnyard.ParseBarcodeFile;
import org.broadinstitute.dropseqrna.cmdline.DropSeq;
import org.broadinstitute.dropseqrna.utils.FilteredIterator;
import org.broadinstitute.dropseqrna.utils.StringTagComparator;
import org.broadinstitute.dropseqrna.utils.readiterators.SamRecordSortingIteratorFactory;
import picard.analysis.MetricAccumulationLevel;
import picard.analysis.RnaSeqMetrics;
import picard.analysis.directed.RnaSeqMetricsCollector;
import picard.annotation.Gene;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.metrics.PerUnitMetricCollector;

@CommandLineProgramProperties(usage="An adaptation of the Picard RnaSeqMetricsCollector to collect per-cell data.  In particular, the exon/intron/genic/intragenic/rRNA levels This program looks at the mapping from each of the reads in both genomic and library space, and selects the better mapping.", usageShort="Measures the intron/exon/genic/intergenic/rRNA levels of each cell.", programGroup=DropSeq.class)
public class SingleCellRnaSeqMetricsCollector
extends CommandLineProgram {
    private static final Log log = Log.getInstance(SingleCellRnaSeqMetricsCollector.class);
    @Option(shortName="I", doc="The input SAM or BAM file to analyze.")
    public File INPUT;
    @Option(shortName="O", doc="Output file of per-cell exonic/intronic/genic/intergenic/rRNA levels.  This supports zipped formats like gz and bz2.")
    public File OUTPUT;
    @Option(doc="The cell barcode tag.  If there are no reads with this tag, the program will assume that all reads belong to the same cell and process in single sample mode.")
    public String CELL_BARCODE_TAG = "XC";
    @Option(doc="Gene annotations in refFlat or GTF format.")
    public File ANNOTATIONS_FILE;
    @Option(doc="Location of rRNA sequences in genome, in interval_list format.  If not specified no bases will be identified as being ribosomal.  Format described here: http://picard.sourceforge.net/javadoc/net/sf/picard/util/IntervalList.html", optional=true)
    public File RIBOSOMAL_INTERVALS;
    @Option(shortName="STRAND", doc="For strand-specific library prep. For unpaired reads, use FIRST_READ_TRANSCRIPTION_STRAND if the reads are expected to be on the transcription strand.")
    public RnaSeqMetricsCollector.StrandSpecificity STRAND_SPECIFICITY = RnaSeqMetricsCollector.StrandSpecificity.NONE;
    @Option(doc="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair by this must in order to be considered rRNA.")
    public double RRNA_FRAGMENT_PERCENTAGE = 0.8;
    @Option(doc="Number of cells that you think are in the library. The top NUM_CORE_BARCODES will be reported in the output.", mutex={"CELL_BC_FILE"})
    public Integer NUM_CORE_BARCODES = null;
    @Option(doc="Override NUM_CORE_BARCODES, and process reads that have the cell barcodes in this file instead.  When supplied, output is ordered to match the input barcode ordering. The file has 1 column with no header.", mutex={"NUM_CORE_BARCODES"})
    public File CELL_BC_FILE = null;
    @Option(doc="The map quality of the read to be included for determining which cells will be measured.")
    public Integer READ_MQ = 10;
    @Option(doc="If specified, count bases that align to this sequence separately from other categories")
    public List<String> MT_SEQUENCE;

    protected int doWork() {
        IOUtil.assertFileIsReadable((File)this.INPUT);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        for (String mtSequence : this.MT_SEQUENCE) {
            SAMSequenceRecord samSequenceRecord = SamReaderFactory.makeDefault().open(this.INPUT).getFileHeader().getSequence(mtSequence);
            if (samSequenceRecord != null) continue;
            throw new RuntimeException("MT_SEQUENCE '" + mtSequence + "' is not found in sequence dictionary in " + this.INPUT.getAbsolutePath());
        }
        List<String> cellBarcodes = this.getCellBarcodes(this.CELL_BC_FILE, this.INPUT, this.CELL_BARCODE_TAG, this.READ_MQ, this.NUM_CORE_BARCODES);
        RnaSeqMetricsCollector collector = this.getRNASeqMetricsCollector(this.CELL_BARCODE_TAG, cellBarcodes, this.INPUT, this.STRAND_SPECIFICITY, this.RRNA_FRAGMENT_PERCENTAGE, this.READ_MQ, this.ANNOTATIONS_FILE, this.RIBOSOMAL_INTERVALS);
        MetricsFile file = this.getMetricsFile();
        log.info(new Object[]{"Adding metrics to file.  This may take a while, with no progress messages."});
        collector.addAllLevelsToFile(file);
        BufferedWriter b = IOUtil.openFileForBufferedWriting((File)this.OUTPUT);
        file.write((Writer)b);
        try {
            b.close();
        }
        catch (IOException io) {
            throw new TranscriptomeException("Problem writing file", io);
        }
        return 0;
    }

    private List<String> getCellBarcodes(File cellBCFile, File bamFile, String cellBarcodeTag, int readMQ, Integer numCoreBarcodes) {
        if (cellBCFile != null) {
            List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
            log.info(new Object[]{"Found " + cellBarcodes.size() + " cell barcodes in file"});
            return cellBarcodes;
        }
        BarcodeListRetrieval u = new BarcodeListRetrieval();
        return u.getListCellBarcodesByReadCount(bamFile, cellBarcodeTag, readMQ, null, numCoreBarcodes);
    }

    RnaSeqMetricsCollector getRNASeqMetricsCollector(String cellBarcodeTag, List<String> cellBarcodes, File inBAM, RnaSeqMetricsCollector.StrandSpecificity strand, double rRNAFragmentPCT, int readMQ, File annotationsFile, File rRNAIntervalsFile) {
        CollectorFactory factory = new CollectorFactory(inBAM, strand, rRNAFragmentPCT, annotationsFile, rRNAIntervalsFile);
        RnaSeqMetricsCollector collector = factory.getCollector(cellBarcodes);
        List<SAMReadGroupRecord> rg = factory.getReadGroups(cellBarcodes);
        CloseableIterator<SAMRecord> iter = this.getReadsInTagOrder(inBAM, cellBarcodeTag, rg, cellBarcodes, readMQ);
        ProgressLogger p = new ProgressLogger(log, 1000000, "Accumulating metrics");
        while (iter.hasNext()) {
            SAMRecord r = (SAMRecord)iter.next();
            String cellBarcode = r.getStringAttribute(cellBarcodeTag);
            r.setAttribute("RG", (Object)cellBarcode);
            p.record(r);
            collector.acceptRecord(r, null);
        }
        collector.finish();
        return collector;
    }

    private CloseableIterator<SAMRecord> getReadsInTagOrder(File bamFile, final String primaryTag, List<SAMReadGroupRecord> rg, List<String> allCellBarcodes, final int mapQuality) {
        SamReader reader = SamReaderFactory.makeDefault().open(bamFile);
        SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary();
        List programs = reader.getFileHeader().getProgramRecords();
        final HashSet<String> cellBarcodeSet = new HashSet<String>(allCellBarcodes);
        SAMFileHeader writerHeader = new SAMFileHeader();
        for (SAMReadGroupRecord z : rg) {
            reader.getFileHeader().addReadGroup(z);
            writerHeader.addReadGroup(z);
        }
        writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
        writerHeader.setSequenceDictionary(dict);
        for (SAMProgramRecord spr : programs) {
            writerHeader.addProgramRecord(spr);
        }
        FilteredIterator<SAMRecord> rgAddingFilter = new FilteredIterator<SAMRecord>((Iterator)reader.iterator()){

            @Override
            public boolean filterOut(SAMRecord r) {
                String cellBarcode = r.getStringAttribute(primaryTag);
                if (cellBarcodeSet.contains(cellBarcode) & r.getMappingQuality() >= mapQuality) {
                    r.setAttribute("RG", (Object)cellBarcode);
                    return false;
                }
                return true;
            }
        };
        ProgressLogger p = new ProgressLogger(log, 1000000, "Preparing reads in core barcodes");
        CloseableIterator<SAMRecord> sortedIterator = SamRecordSortingIteratorFactory.create(writerHeader, (Iterator<SAMRecord>)((Object)rgAddingFilter), new StringTagComparator(primaryTag), p);
        log.info(new Object[]{"Sorting finished."});
        return sortedIterator;
    }

    public static void main(String[] args) {
        System.exit(new SingleCellRnaSeqMetricsCollector().instanceMain(args));
    }

    private class RnaSeqMtMetricsCollector
    extends RnaSeqMetricsCollector {
        public RnaSeqMtMetricsCollector(Set<MetricAccumulationLevel> accumulationLevels, List<SAMReadGroupRecord> samRgRecords, Long ribosomalBasesInitialValue, OverlapDetector<Gene> geneOverlapDetector, OverlapDetector<Interval> ribosomalSequenceOverlapDetector, HashSet<Integer> ignoredSequenceIndices, int minimumLength, RnaSeqMetricsCollector.StrandSpecificity strandSpecificity, double rrnaFragmentPercentage, boolean collectCoverageStatistics) {
            super(accumulationLevels, samRgRecords, ribosomalBasesInitialValue, geneOverlapDetector, ribosomalSequenceOverlapDetector, ignoredSequenceIndices, minimumLength, strandSpecificity, rrnaFragmentPercentage, collectCoverageStatistics);
        }

        protected PerUnitMetricCollector<RnaSeqMetrics, Integer, SAMRecord> makeChildCollector(String sample, String library, String readGroup) {
            return new PerUnitRnaSeqMtMetricsCollector(sample, library, readGroup, this.ribosomalInitialValue);
        }

        private class PerUnitRnaSeqMtMetricsCollector
        extends RnaSeqMetricsCollector.PerUnitRnaSeqMetricsCollector {
            public PerUnitRnaSeqMtMetricsCollector(String sample, String library, String readGroup, Long ribosomalBasesInitialValue) {
                super((RnaSeqMetricsCollector)RnaSeqMtMetricsCollector.this, (RnaSeqMetrics)new RnaSeqMtMetrics(), sample, library, readGroup, ribosomalBasesInitialValue);
            }

            private RnaSeqMtMetrics castMetrics() {
                return (RnaSeqMtMetrics)this.metrics;
            }

            public void acceptRecord(SAMRecord rec) {
                if (SingleCellRnaSeqMetricsCollector.this.MT_SEQUENCE.contains(rec.getReferenceName()) && !rec.getReadFailsVendorQualityCheckFlag() && !rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) {
                    this.metrics.PF_BASES += (long)rec.getReadLength();
                    int numAlignedBases = this.getNumAlignedBases(rec);
                    this.castMetrics().MT_BASES += (long)numAlignedBases;
                    this.metrics.PF_ALIGNED_BASES += (long)numAlignedBases;
                } else {
                    super.acceptRecord(rec);
                }
            }

            public void finish() {
                super.finish();
                if (this.metrics.PF_ALIGNED_BASES > 0L) {
                    this.castMetrics().PCT_MT_BASES = (double)this.castMetrics().MT_BASES / (double)this.metrics.PF_ALIGNED_BASES;
                }
            }
        }
    }

    public static class RnaSeqMtMetrics
    extends RnaSeqMetrics {
        public long MT_BASES;
        public double PCT_MT_BASES;
    }

    private class CollectorFactory {
        final OverlapDetector<Gene> geneOverlapDetector;
        final Long ribosomalBasesInitialValue;
        final OverlapDetector<Interval> ribosomalSequenceOverlapDetector;
        final HashSet<Integer> ignoredSequenceIndices;
        final RnaSeqMetricsCollector.StrandSpecificity specificity;
        final double rnaFragPct;

        public CollectorFactory(File bamFile, RnaSeqMetricsCollector.StrandSpecificity specificity, double rnaFragPct, File annotationsFile, File ribosomalIntervals) {
            this.specificity = specificity;
            this.rnaFragPct = rnaFragPct;
            SamReader reader = SamReaderFactory.makeDefault().open(bamFile);
            this.geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, reader.getFileHeader().getSequenceDictionary());
            log.info(new Object[]{"Loaded " + this.geneOverlapDetector.getAll().size() + " genes."});
            this.ribosomalBasesInitialValue = ribosomalIntervals != null ? Long.valueOf(0L) : null;
            this.ribosomalSequenceOverlapDetector = RnaSeqMetricsCollector.makeOverlapDetector((File)bamFile, (SAMFileHeader)reader.getFileHeader(), (File)ribosomalIntervals, (Log)log);
            this.ignoredSequenceIndices = RnaSeqMetricsCollector.makeIgnoredSequenceIndicesSet((SAMFileHeader)reader.getFileHeader(), new HashSet());
            CloserUtil.close((Object)reader);
        }

        public RnaSeqMetricsCollector getCollector(List<String> cellBarcodes) {
            List<SAMReadGroupRecord> readGroups = this.getReadGroups(cellBarcodes);
            return new RnaSeqMtMetricsCollector(CollectionUtil.makeSet((Object[])new MetricAccumulationLevel[]{MetricAccumulationLevel.READ_GROUP}), readGroups, this.ribosomalBasesInitialValue, this.geneOverlapDetector, this.ribosomalSequenceOverlapDetector, this.ignoredSequenceIndices, 500, this.specificity, this.rnaFragPct, false);
        }

        public List<SAMReadGroupRecord> getReadGroups(List<String> cellBarcodes) {
            ArrayList<SAMReadGroupRecord> g = new ArrayList<SAMReadGroupRecord>(cellBarcodes.size());
            for (String id : cellBarcodes) {
                SAMReadGroupRecord rg = new SAMReadGroupRecord(id);
                rg.setLibrary(id);
                rg.setPlatform(id);
                rg.setSample(id);
                rg.setPlatformUnit(id);
                g.add(rg);
            }
            return g;
        }
    }
}

