/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.dropseqrna.barnyard;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.dropseqrna.barnyard.BarcodeListRetrieval;
import org.broadinstitute.dropseqrna.barnyard.DGECommandLineBase;
import org.broadinstitute.dropseqrna.barnyard.digitalexpression.DgeHeader;
import org.broadinstitute.dropseqrna.barnyard.digitalexpression.DgeHeaderCodec;
import org.broadinstitute.dropseqrna.barnyard.digitalexpression.DgeHeaderLibrary;
import org.broadinstitute.dropseqrna.barnyard.digitalexpression.UMICollection;
import org.broadinstitute.dropseqrna.cmdline.DropSeq;
import org.broadinstitute.dropseqrna.utils.ObjectCounter;
import org.broadinstitute.dropseqrna.utils.editdistance.EDUtils;
import org.broadinstitute.dropseqrna.utils.io.ErrorCheckingPrintStream;
import org.broadinstitute.dropseqrna.utils.readiterators.SamFileMergeUtil;
import org.broadinstitute.dropseqrna.utils.readiterators.UMIIterator;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;

@CommandLineProgramProperties(usage="Measures the digital expression of a library.  Method: 1) For each gene, find the molecular barcodes on the exons of that gene.  2) Determine how many HQ mapped reads are assigned to each barcode.  3) Collapse barcodes by edit distance.  4) Throw away barcodes with less than threshold # of reads. 5) Count the number of remaining unique molecular barcodes for the gene.This program requires a tag for what gene a read is on, a molecular barcode tag, and a exon tag.  The exon and gene tags may not be present on every read.When filtering the data for a set of barcodes to use, the data is filtered by ONE of the following methods (and if multiple params are filled in, the top one takes precidence):1) CELL_BC_FILE, to filter by the some fixed list of cell barcodes2) MIN_NUM_GENES_PER_CELL 3) MIN_NUM_TRANSCRIPTS_PER_CELL 4) NUM_CORE_BARCODES 5) MIN_NUM_READS_PER_CELL", usageShort="Calculate Digital Expression", programGroup=DropSeq.class)
public class DigitalExpression
extends DGECommandLineBase {
    private static final Log log = Log.getInstance(DigitalExpression.class);
    @Option(doc="A summary of the digital expression output, containing 3 columns - the cell barcode, the #genes, and the #transcripts.", optional=true)
    public File SUMMARY = null;
    @Option(doc="Output number of reads instead of number of unique molecular barcodes.", optional=true)
    public boolean OUTPUT_READS_INSTEAD = false;
    @Option(shortName="O", doc="Output file of DGE Matrix.  Genes are in rows, cells in columns.  The first column contains the gene name. This supports zipped formats like gz and bz2.")
    public File OUTPUT;
    @Option(doc="Output only genes with at least this total expression level, after summing across all cells", optional=true)
    public Integer MIN_SUM_EXPRESSION = null;
    @Option(shortName="H", doc="If true, write a header in the DGE file.  If not specified, and UEI is specified, it is set to true", optional=true)
    public Boolean OUTPUT_HEADER;
    @Option(shortName="UEI", doc="If OUTPUT_HEADER=true, this is required", optional=true)
    public String UNIQUE_EXPERIMENT_ID;
    @Option(shortName="R", optional=true, doc="Reference to which BAM is aligned.  This is only used to put into DGE header, if OUTPUT_HEADER=true.  If not specified, it is extracted from the INPUT header if possible.")
    public File REFERENCE;
    private boolean OUTPUT_EXPRESSED_GENES_ONLY = false;
    static final Comparator<DESummary> TRANSCRIPT_ORDER_DESCENDING = new Comparator<DESummary>(){

        @Override
        public int compare(DESummary e1, DESummary e2) {
            return e1.NUM_TRANSCRIPTS > e2.NUM_TRANSCRIPTS ? -1 : (e1.NUM_TRANSCRIPTS == e2.NUM_TRANSCRIPTS ? 0 : 1);
        }
    };

    protected int doWork() {
        List<String> cellBarcodes;
        String uri;
        SAMFileHeader header;
        SAMSequenceRecord sequence;
        IOUtil.assertFileIsReadable((File)this.INPUT);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        if (this.OUTPUT_HEADER == null) {
            this.OUTPUT_HEADER = this.UNIQUE_EXPERIMENT_ID != null;
        }
        if (this.SUMMARY != null) {
            IOUtil.assertFileIsWritable((File)this.SUMMARY);
        }
        if (this.REFERENCE == null && this.OUTPUT_HEADER.booleanValue() && (sequence = (header = SamReaderFactory.makeDefault().open(this.INPUT).getFileHeader()).getSequence(0)) != null && (uri = sequence.getAttribute("UR")) != null) {
            String filePrefix = "file:";
            if (uri.startsWith("file:")) {
                uri = uri.substring("file:".length());
            }
            this.REFERENCE = new File(uri);
        }
        if ((cellBarcodes = new BarcodeListRetrieval().getCellBarcodes(this.INPUT, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.GENE_EXON_TAG, this.STRAND_TAG, this.CELL_BC_FILE, this.READ_MQ, this.MIN_NUM_TRANSCRIPTS_PER_CELL, this.MIN_NUM_GENES_PER_CELL, this.MIN_NUM_READS_PER_CELL, this.NUM_CORE_BARCODES, this.EDIT_DISTANCE, this.MIN_BC_READ_THRESHOLD, this.USE_STRAND_INFO)).isEmpty()) {
            log.error(new Object[]{"Running digital expression without somehow selecting a set of barcodes to process no longer supported."});
            return 1;
        }
        log.info(new Object[]{"Calculating digital expression for [" + cellBarcodes.size() + "] cells."});
        this.digitalExpression(cellBarcodes);
        return 0;
    }

    private void digitalExpression(List<String> cellBarcodes) {
        UMICollection batch;
        ErrorCheckingPrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting((File)this.OUTPUT));
        if (this.OUTPUT_HEADER.booleanValue()) {
            this.writeDgeHeader(out);
        }
        this.writeHeader(out, cellBarcodes);
        UMIIterator umiIterator = new UMIIterator(SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false), this.GENE_EXON_TAG, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.STRAND_TAG, this.READ_MQ, true, this.USE_STRAND_INFO, cellBarcodes);
        String gene = null;
        HashMap<String, Integer> transcriptCountMap = new HashMap<String, Integer>();
        HashMap<String, Integer> readCountMap = new HashMap<String, Integer>();
        Map<String, DESummary> summaryMap = DigitalExpression.initializeSummary(cellBarcodes);
        while ((batch = umiIterator.next()) != null) {
            int readCount;
            int molBCCount;
            if (batch == null || batch.isEmpty()) continue;
            String currentGene = batch.getGeneName();
            if (gene == null) {
                gene = currentGene;
            }
            if (gene.equals(currentGene)) {
                if (this.RARE_UMI_FILTER_THRESHOLD > 0.0) {
                    batch.filterByUMIFrequency(this.RARE_UMI_FILTER_THRESHOLD);
                }
                molBCCount = batch.getDigitalExpression(this.MIN_BC_READ_THRESHOLD, this.EDIT_DISTANCE, this.OUTPUT_READS_INSTEAD);
                transcriptCountMap.put(batch.getCellBarcode(), molBCCount);
                readCount = batch.getDigitalExpression(this.MIN_BC_READ_THRESHOLD, this.EDIT_DISTANCE, true);
                readCountMap.put(batch.getCellBarcode(), readCount);
            }
            if (gene.equals(currentGene)) continue;
            this.writeStats(gene, transcriptCountMap, cellBarcodes, out);
            DigitalExpression.addToSummary(readCountMap, transcriptCountMap, summaryMap);
            transcriptCountMap.clear();
            if (this.RARE_UMI_FILTER_THRESHOLD > 0.0) {
                batch.filterByUMIFrequency(this.RARE_UMI_FILTER_THRESHOLD);
            }
            molBCCount = batch.getDigitalExpression(this.MIN_BC_READ_THRESHOLD, this.EDIT_DISTANCE, this.OUTPUT_READS_INSTEAD);
            transcriptCountMap.put(batch.getCellBarcode(), molBCCount);
            readCount = batch.getDigitalExpression(this.MIN_BC_READ_THRESHOLD, this.EDIT_DISTANCE, true);
            readCountMap.put(batch.getCellBarcode(), readCount);
            gene = currentGene;
        }
        if (!transcriptCountMap.isEmpty()) {
            this.writeStats(gene, transcriptCountMap, cellBarcodes, out);
            DigitalExpression.addToSummary(readCountMap, transcriptCountMap, summaryMap);
        }
        ((PrintStream)out).close();
        if (this.SUMMARY != null) {
            this.writeSummary(summaryMap.values(), this.SUMMARY);
        }
        CloserUtil.close((Object)umiIterator);
    }

    private void writeDgeHeader(PrintStream out) {
        DgeHeader header = new DgeHeader();
        header.setExpressionFormat(DgeHeader.ExpressionFormat.raw);
        DgeHeaderLibrary lib = new DgeHeaderLibrary(this.UNIQUE_EXPERIMENT_ID);
        if (this.REFERENCE != null) {
            lib.setReference(this.REFERENCE.getAbsoluteFile());
        }
        lib.setInput(this.INPUT.getAbsoluteFile());
        this.setDgeHeaderLibraryField(lib, "OUTPUT_READS_INSTEAD", this.OUTPUT_READS_INSTEAD);
        this.setDgeHeaderLibraryField(lib, "MIN_SUM_EXPRESSION", this.MIN_SUM_EXPRESSION);
        this.setDgeHeaderLibraryField(lib, "EDIT_DISTANCE", this.EDIT_DISTANCE);
        this.setDgeHeaderLibraryField(lib, "READ_MQ", this.READ_MQ);
        this.setDgeHeaderLibraryField(lib, "MIN_BC_READ_THRESHOLD", this.MIN_BC_READ_THRESHOLD);
        this.setDgeHeaderLibraryField(lib, "MIN_NUM_READS_PER_CELL", this.MIN_NUM_READS_PER_CELL);
        this.setDgeHeaderLibraryField(lib, "MIN_NUM_GENES_PER_CELL", this.MIN_NUM_GENES_PER_CELL);
        this.setDgeHeaderLibraryField(lib, "MIN_NUM_TRANSCRIPTS_PER_CELL", this.MIN_NUM_TRANSCRIPTS_PER_CELL);
        this.setDgeHeaderLibraryField(lib, "NUM_CORE_BARCODES", this.NUM_CORE_BARCODES);
        if (this.CELL_BC_FILE != null) {
            this.setDgeHeaderLibraryField(lib, "CELL_BC_FILE", this.CELL_BC_FILE.getAbsoluteFile());
        } else {
            this.setDgeHeaderLibraryField(lib, "CELL_BC_FILE", null);
        }
        this.setDgeHeaderLibraryField(lib, "USE_STRAND_INFO", this.USE_STRAND_INFO);
        this.setDgeHeaderLibraryField(lib, "RARE_UMI_FILTER_THRESHOLD", this.RARE_UMI_FILTER_THRESHOLD);
        header.addLibrary(lib);
        header.addCommand(this.getCommandLine());
        OutputStreamWriter writer = new OutputStreamWriter(out);
        new DgeHeaderCodec().encode(writer, header);
        try {
            writer.flush();
        }
        catch (IOException e) {
            throw new RuntimeException("Exception writing " + this.OUTPUT, e);
        }
    }

    private <T> void setDgeHeaderLibraryField(DgeHeaderLibrary lib, String key, T value) {
        String stringValue = value != null ? value.toString() : "NULL";
        lib.setTag(key, stringValue);
    }

    public ObjectCounter<String> collapseByEditDistance(ObjectCounter<String> barcodes, int editDistance, int threshold) {
        ObjectCounter<String> result = new ObjectCounter<String>();
        List<String> barcodeList = barcodes.getKeysOrderedByCount(true);
        if (this.EDIT_DISTANCE == 0) {
            for (String barcode : barcodeList) {
                int count = barcodes.getCountForKey(barcode);
                result.setCount(barcode, count);
            }
            return result;
        }
        while (!barcodeList.isEmpty()) {
            String b = barcodeList.get(0);
            barcodeList.remove(b);
            Set<String> closeBC = EDUtils.getInstance().getStringsWithinEditDistanceWithIndel(b, barcodeList, editDistance);
            barcodeList.removeAll(closeBC);
            closeBC.add(b);
            int totalCount = 0;
            for (String bc : closeBC) {
                int count = barcodes.getCountForKey(bc);
                totalCount += count;
            }
            result.setCount(b, totalCount);
        }
        return result;
    }

    private void writeStats(String gene, Map<String, Integer> countMap, List<String> cellBarcodes, PrintStream out) {
        int totalCount = 0;
        ArrayList<String> line = new ArrayList<String>(cellBarcodes.size() + 1);
        line.add(gene);
        for (String b : cellBarcodes) {
            Integer count = countMap.get(b);
            String v = "0";
            if (count != null) {
                totalCount += count.intValue();
                v = count.toString();
            }
            line.add(v);
        }
        if (this.OUTPUT_EXPRESSED_GENES_ONLY & totalCount == 0) {
            return;
        }
        if (this.MIN_SUM_EXPRESSION != null && totalCount < this.MIN_SUM_EXPRESSION) {
            return;
        }
        String h = StringUtils.join(line, (String)"\t");
        out.println(h);
    }

    private void writeHeader(PrintStream out, List<String> cellBarcodes) {
        ArrayList<String> header = new ArrayList<String>(cellBarcodes.size() + 1);
        header.add("GENE");
        for (String c : cellBarcodes) {
            header.add(c);
        }
        String h = StringUtils.join(header, (String)"\t");
        out.println(h);
    }

    public static Map<String, DESummary> initializeSummary(Collection<String> cellBarcodes) {
        HashMap<String, DESummary> map = new HashMap<String, DESummary>();
        for (String s : cellBarcodes) {
            DESummary des = new DESummary(s);
            map.put(s, des);
        }
        return map;
    }

    public static Map<String, DESummary> addToSummary(Map<String, Integer> readCountMap, Map<String, Integer> transcriptCountMap, Map<String, DESummary> summaryMap) {
        for (String cellBC : transcriptCountMap.keySet()) {
            DESummary sum = summaryMap.get(cellBC);
            ++sum.NUM_GENES;
            sum.NUM_TRANSCRIPTS += transcriptCountMap.get(cellBC).intValue();
            sum.NUM_GENIC_READS += readCountMap.get(cellBC).intValue();
        }
        return summaryMap;
    }

    void writeSummary(Collection<DESummary> summaryCollection, File outFile) {
        MetricsFile out = this.getMetricsFile();
        ArrayList<DESummary> sc = new ArrayList<DESummary>(summaryCollection);
        Collections.sort(sc, TRANSCRIPT_ORDER_DESCENDING);
        for (DESummary z : sc) {
            out.addMetric((MetricBase)z);
        }
        out.write(outFile);
    }

    public static void main(String[] args) {
        System.exit(new DigitalExpression().instanceMain(args));
    }

    public static class DESummary
    extends MetricBase {
        public String CELL_BARCODE;
        public int NUM_GENIC_READS;
        public int NUM_TRANSCRIPTS;
        public int NUM_GENES;

        public DESummary(String cellBarcode) {
            this.CELL_BARCODE = cellBarcode;
            this.NUM_GENES = 0;
            this.NUM_TRANSCRIPTS = 0;
            this.NUM_GENIC_READS = 0;
        }
    }
}

