/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.dropseqrna.barnyard;

import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.dropseqrna.TranscriptomeException;
import org.broadinstitute.dropseqrna.barnyard.BarcodeListRetrieval;
import org.broadinstitute.dropseqrna.barnyard.DGECommandLineBase;
import org.broadinstitute.dropseqrna.barnyard.digitalexpression.UMICollection;
import org.broadinstitute.dropseqrna.cmdline.DropSeq;
import org.broadinstitute.dropseqrna.utils.ObjectCounter;
import org.broadinstitute.dropseqrna.utils.OutputWriterUtil;
import org.broadinstitute.dropseqrna.utils.readiterators.SamFileMergeUtil;
import org.broadinstitute.dropseqrna.utils.readiterators.UMIIterator;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;

@CommandLineProgramProperties(usage="For each gene, count the number of times each molecular barcode is observed [The UMI]Similar to digital expression, reads are filtered on map quality, and must overlap exons as well as genes. This program requires a tag for what gene a read is on, a molecular barcode tag, and a exon tag.  The exon and gene tags may not be present on every read.When filtering the data for a set of barcodes to use, the data is filtered by ONE of the following methods (and if multiple params are filled in, the top one takes precidence):\n1) CELL_BC_FILE, to filter by the some fixed list of cell barcodes2) MIN_NUM_GENES_PER_CELL 3) MIN_NUM_TRANSCRIPTS_PER_CELL 4) NUM_CORE_BARCODES 5) MIN_NUM_READS_PER_CELL", usageShort="Get the number of reads for each UMI", programGroup=DropSeq.class)
public class GatherMolecularBarcodeDistributionByGene
extends DGECommandLineBase {
    private static final Log log = Log.getInstance(GatherMolecularBarcodeDistributionByGene.class);
    @Option(shortName="O", doc="Output file of with 4 columns: CELL, GENE, MOLECULAR BC, #Observations. This supports zipped formats like gz and bz2.")
    public File OUTPUT;

    protected int doWork() {
        UMICollection batch;
        IOUtil.assertFileIsReadable((File)this.INPUT);
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        BufferedWriter out = IOUtil.openFileForBufferedWriting((File)this.OUTPUT);
        this.writePerTranscriptHeader(out);
        HashSet<String> barcodes = new HashSet<String>(new BarcodeListRetrieval().getCellBarcodes(this.INPUT, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.GENE_EXON_TAG, this.STRAND_TAG, this.CELL_BC_FILE, this.READ_MQ, this.MIN_NUM_TRANSCRIPTS_PER_CELL, this.MIN_NUM_GENES_PER_CELL, this.MIN_NUM_READS_PER_CELL, this.NUM_CORE_BARCODES, this.EDIT_DISTANCE, this.MIN_BC_READ_THRESHOLD, this.USE_STRAND_INFO));
        UMIIterator umiIterator = new UMIIterator(SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false), this.GENE_EXON_TAG, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.STRAND_TAG, this.READ_MQ, true, this.USE_STRAND_INFO, barcodes);
        while ((batch = umiIterator.next()) != null) {
            String cellTag;
            if (batch.isEmpty() || !barcodes.contains(cellTag = batch.getCellBarcode()) && !barcodes.isEmpty()) continue;
            this.writePerTranscriptStats(batch.getGeneName(), batch.getCellBarcode(), batch.getMolecularBarcodeCountsCollapsed(this.EDIT_DISTANCE), out);
        }
        CloserUtil.close((Object)umiIterator);
        try {
            out.close();
        }
        catch (IOException io) {
            throw new TranscriptomeException("Problem writing file", io);
        }
        return 0;
    }

    private void writePerTranscriptStats(String gene, String cellBarcode, ObjectCounter<String> counts, BufferedWriter out) {
        for (String key : counts.getKeys()) {
            int value = counts.getCountForKey(key);
            Object[] line = new String[]{cellBarcode, gene, key, value + ""};
            String h = StringUtils.join((Object[])line, (String)"\t");
            OutputWriterUtil.writeResult(h, out);
        }
    }

    private void writePerTranscriptHeader(BufferedWriter out) {
        Object[] header = new String[]{"Cell Barcode", "Gene", "Molecular_Barcode", "Num_Obs"};
        String h = StringUtils.join((Object[])header, (String)"\t");
        OutputWriterUtil.writeResult(h, out);
    }

    public ObjectCounter<String> getNumTranscriptsPerCell(File bamFile, String cellTag, String molecularBarcodeTag, String geneExonTag, String strandTag, Integer mapQuality, int editDistance, int minNumReadsMolBarcode, boolean useStrandInfo) {
        UMICollection batch;
        UMIIterator umiIterator = new UMIIterator(SamFileMergeUtil.mergeInputs(Collections.singletonList(bamFile), false), geneExonTag, cellTag, molecularBarcodeTag, strandTag, mapQuality, true, useStrandInfo, null);
        ObjectCounter<String> transcriptsPerCell = new ObjectCounter<String>();
        while ((batch = umiIterator.next()) != null) {
            if (batch.isEmpty()) continue;
            int numTranscripts = batch.getMolecularBarcodeCountsCollapsed(editDistance).getSize();
            transcriptsPerCell.incrementByCount(batch.getCellBarcode(), numTranscripts);
        }
        umiIterator.close();
        return transcriptsPerCell;
    }

    public static void main(String[] args) {
        System.exit(new GatherMolecularBarcodeDistributionByGene().instanceMain(args));
    }
}

