/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.dropseqrna.barnyard;

import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.broadinstitute.dropseqrna.barnyard.BarcodeListRetrieval;
import org.broadinstitute.dropseqrna.barnyard.DigitalExpression;
import org.broadinstitute.dropseqrna.barnyard.digitalexpression.UMICollection;
import org.broadinstitute.dropseqrna.cmdline.CustomCommandLineValidationHelper;
import org.broadinstitute.dropseqrna.cmdline.DropSeq;
import org.broadinstitute.dropseqrna.utils.readiterators.SamFileMergeUtil;
import org.broadinstitute.dropseqrna.utils.readiterators.SamHeaderAndIterator;
import org.broadinstitute.dropseqrna.utils.readiterators.UMIIterator;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;

@CommandLineProgramProperties(usage="Determine the cell barcodes that have a minimum number of transcripts.  For a multi-species BAM, the minimum must be reached in transcripts for a single species.", usageShort="Determine the cell barcodes that have a minimum number of transcripts.", programGroup=DropSeq.class)
public class SelectCellsByNumTranscripts
extends CommandLineProgram {
    @Option(shortName="I", doc="SAM or BAM file to analyze.")
    public File INPUT;
    @Option(shortName="O", doc="List of cell barcodes, one per line")
    public File OUTPUT;
    @Option(doc="Select cells with at least this many transcripts")
    public Integer MIN_TRANSCRIPTS_PER_CELL;
    @Option(doc="Limit cells to those with at least this many reads.", optional=true)
    public Integer MIN_READS_PER_CELL;
    @Option(doc="If specified, transcripts are counted per species, and the MIN_TRANSCRIPTS_PER_CELL threshold must be reached by transcripts for a single species before a cell is selected.", optional=true)
    public List<String> ORGANISM;
    @Option(doc="If set, cells with minimum number of reads are written to this file.", optional=true)
    public File OUTPUT_INTERIM_CELLS;
    @Option(doc="If set, read cells from this file rather than filtering BAM for cells with minimum number of reads.", optional=true)
    public File INPUT_INTERIM_CELLS;
    @Option(doc="The cell barcode tag.  If there are no reads with this tag, the program will assume that all reads belong to the same cell and process in single sample mode.")
    public String CELL_BARCODE_TAG = "XC";
    @Option(doc="The molecular barcode tag.")
    public String MOLECULAR_BARCODE_TAG = "XM";
    @Option(doc="The Gene/Exon tag")
    public String GENE_EXON_TAG = "GE";
    @Option(doc="The strand of the gene(s) the read overlaps.  When there are multiple genes, they will be comma seperated.")
    public String STRAND_TAG = "GS";
    @Option(doc="The map quality of the read to be included.")
    public int READ_MQ = 10;
    @Option(doc="Is the library stranded?  If so, use the strand info to more precisely place reads on the correct gene, and ignore reads that are on the wrong strand.")
    public boolean USE_STRAND_INFO = true;
    @Option(doc="The edit distance that molecular barcodes should be combined at within a gene.")
    public Integer EDIT_DISTANCE = 1;
    private static final Log log = Log.getInstance(SelectCellsByNumTranscripts.class);
    private static final String ORGANISM_SEPARATOR = "::";

    protected int doWork() {
        UMICollection batch;
        MapContainer mapContainer;
        IOUtil.assertFileIsWritable((File)this.OUTPUT);
        if (this.MIN_READS_PER_CELL == null || this.MIN_READS_PER_CELL < this.MIN_TRANSCRIPTS_PER_CELL) {
            this.MIN_READS_PER_CELL = this.MIN_TRANSCRIPTS_PER_CELL;
        }
        List<String> cellBarcodes = this.INPUT_INTERIM_CELLS == null ? new BarcodeListRetrieval().getListCellBarcodesByReadCount(this.INPUT, this.CELL_BARCODE_TAG, this.READ_MQ, this.MIN_READS_PER_CELL, null) : SelectCellsByNumTranscripts.readBarcodes(this.INPUT_INTERIM_CELLS);
        if (this.OUTPUT_INTERIM_CELLS != null) {
            SelectCellsByNumTranscripts.writeBarcodes(this.OUTPUT_INTERIM_CELLS, cellBarcodes);
        }
        SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false);
        if (!this.ORGANISM.isEmpty()) {
            headerAndIterator = new SamHeaderAndIterator(headerAndIterator.header, new PrefixGeneWithOrganismIterator(headerAndIterator.iterator));
            mapContainer = new MultiOrganismMapContainer(cellBarcodes);
        } else {
            mapContainer = new SingleOrganismMapContainer(cellBarcodes);
        }
        UMIIterator umiIterator = new UMIIterator(headerAndIterator, this.GENE_EXON_TAG, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.STRAND_TAG, this.READ_MQ, true, this.USE_STRAND_INFO, cellBarcodes);
        String gene = null;
        while ((batch = umiIterator.next()) != null) {
            if (batch.isEmpty()) continue;
            String currentGene = batch.getGeneName();
            if (gene == null) {
                gene = currentGene;
            }
            if (!gene.equals(currentGene)) {
                mapContainer.addToSummary(gene);
            }
            gene = currentGene;
            mapContainer.countExpression(gene, batch.getCellBarcode(), batch.getDigitalExpression(0, this.EDIT_DISTANCE, false));
        }
        mapContainer.addToSummary(gene);
        Map<String, Integer> transcriptsPerCell = mapContainer.getTranscriptCountForCellBarcodesOverTranscriptThreshold(this.MIN_TRANSCRIPTS_PER_CELL);
        log.info(new Object[]{"Found " + transcriptsPerCell.size() + " cells with enough transcripts"});
        Map.Entry[] transcriptsPerCellArray = transcriptsPerCell.entrySet().toArray(new Map.Entry[transcriptsPerCell.size()]);
        Arrays.sort(transcriptsPerCellArray, new EntryComparator());
        ArrayList<String> finalBarcodes = new ArrayList<String>(transcriptsPerCellArray.length);
        for (Map.Entry entry : transcriptsPerCellArray) {
            finalBarcodes.add((String)entry.getKey());
        }
        SelectCellsByNumTranscripts.writeBarcodes(this.OUTPUT, finalBarcodes);
        log.info(new Object[]{"Wrote cell barcodes to " + this.OUTPUT.getAbsolutePath()});
        return 0;
    }

    private static void writeBarcodes(File file, List<String> barcodes) {
        BufferedWriter writer = IOUtil.openFileForBufferedWriting((File)file);
        try {
            for (String barcode : barcodes) {
                writer.write(barcode);
                writer.newLine();
            }
            writer.close();
        }
        catch (IOException e) {
            throw new RuntimeIOException("Exception writing " + file.getAbsolutePath(), (Throwable)e);
        }
    }

    private static List<String> readBarcodes(File file) {
        try {
            String barcode;
            IOUtil.assertFileIsReadable((File)file);
            BufferedReader reader = IOUtil.openFileForBufferedReading((File)file);
            ArrayList<String> ret = new ArrayList<String>();
            while ((barcode = reader.readLine()) != null) {
                if (barcode.isEmpty()) continue;
                ret.add(barcode);
            }
            CloserUtil.close((Object)reader);
            return ret;
        }
        catch (IOException e) {
            throw new RuntimeIOException("Exception reading " + file.getAbsolutePath());
        }
    }

    protected String[] customCommandLineValidation() {
        List<String> thisErrors = null;
        if (this.ORGANISM.size() != new HashSet<String>(this.ORGANISM).size()) {
            thisErrors = Arrays.asList("Duplicates not allow in ORGANISM argument");
        } else {
            for (String organism : this.ORGANISM) {
                if (!organism.contains(ORGANISM_SEPARATOR)) continue;
                thisErrors = Arrays.asList("'::' not allowed in ORGANISM argument");
            }
        }
        return CustomCommandLineValidationHelper.makeValue(super.customCommandLineValidation(), thisErrors);
    }

    public static void main(String[] args) {
        new SelectCellsByNumTranscripts().instanceMainWithExit(args);
    }

    private class EntryComparator
    implements Comparator<Map.Entry<String, Integer>> {
        private EntryComparator() {
        }

        @Override
        public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
            return o2.getValue().compareTo(o1.getValue());
        }
    }

    private class MultiOrganismMapContainer
    implements MapContainer {
        final SingleOrganismMapContainer[] innerMapContainer;
        final String[] genePrefixes;

        public MultiOrganismMapContainer(List<String> cellBarcodes) {
            this.innerMapContainer = new SingleOrganismMapContainer[SelectCellsByNumTranscripts.this.ORGANISM.size()];
            this.genePrefixes = new String[SelectCellsByNumTranscripts.this.ORGANISM.size()];
            for (int i = 0; i < SelectCellsByNumTranscripts.this.ORGANISM.size(); ++i) {
                this.innerMapContainer[i] = new SingleOrganismMapContainer(cellBarcodes);
                this.genePrefixes[i] = SelectCellsByNumTranscripts.this.ORGANISM.get(i) + SelectCellsByNumTranscripts.ORGANISM_SEPARATOR;
            }
        }

        private int getOrganismIndex(String gene) {
            for (int i = 0; i < this.genePrefixes.length; ++i) {
                if (!gene.startsWith(this.genePrefixes[i])) continue;
                return i;
            }
            return -1;
        }

        private SingleOrganismMapContainer getInnerMapContainer(String gene) {
            int index = this.getOrganismIndex(gene);
            if (index < 0) {
                throw new IllegalArgumentException("Gene '" + gene + "' is not countable");
            }
            return this.innerMapContainer[index];
        }

        private boolean countableGene(String gene) {
            return this.getOrganismIndex(gene) >= 0;
        }

        @Override
        public void addToSummary(String gene) {
            if (this.countableGene(gene)) {
                this.getInnerMapContainer(gene).addToSummary(gene);
            }
        }

        @Override
        public void countExpression(String gene, String cellBarcode, int molBCCount) {
            if (this.countableGene(gene)) {
                this.getInnerMapContainer(gene).countExpression(gene, cellBarcode, molBCCount);
            }
        }

        @Override
        public Map<String, Integer> getTranscriptCountForCellBarcodesOverTranscriptThreshold(int minNumTranscripts) {
            Map<String, Integer> ret = null;
            for (SingleOrganismMapContainer somc : this.innerMapContainer) {
                Map<String, Integer> organismSet = somc.getTranscriptCountForCellBarcodesOverTranscriptThreshold(minNumTranscripts);
                if (ret == null) {
                    ret = organismSet;
                    continue;
                }
                for (Map.Entry<String, Integer> entry : organismSet.entrySet()) {
                    String cellBarcode = entry.getKey();
                    Integer numTranscripts = ret.get(cellBarcode);
                    Integer newNumTranscripts = entry.getValue();
                    if (numTranscripts != null && numTranscripts >= newNumTranscripts) continue;
                    ret.put(cellBarcode, newNumTranscripts);
                }
            }
            return ret;
        }
    }

    private class SingleOrganismMapContainer
    implements MapContainer {
        final Map<String, Integer> countMap = new HashMap<String, Integer>();
        final Map<String, DigitalExpression.DESummary> summaryMap;

        public SingleOrganismMapContainer(List<String> cellBarcodes) {
            this.summaryMap = DigitalExpression.initializeSummary(cellBarcodes);
        }

        @Override
        public void addToSummary(String gene) {
            DigitalExpression.addToSummary(this.getZeroValueMap(), this.countMap, this.summaryMap);
            this.countMap.clear();
        }

        private Map<String, Integer> getZeroValueMap() {
            HashMap<String, Integer> result = new HashMap<String, Integer>();
            for (String k : this.countMap.keySet()) {
                result.put(k, 0);
            }
            return result;
        }

        @Override
        public void countExpression(String gene, String cellBarcode, int molBCCount) {
            this.countMap.put(cellBarcode, molBCCount);
        }

        @Override
        public Map<String, Integer> getTranscriptCountForCellBarcodesOverTranscriptThreshold(int minNumTranscripts) {
            HashMap<String, Integer> ret = new HashMap<String, Integer>();
            for (DigitalExpression.DESummary summary : this.summaryMap.values()) {
                if (summary.NUM_TRANSCRIPTS < minNumTranscripts) continue;
                ret.put(summary.CELL_BARCODE, summary.NUM_TRANSCRIPTS);
            }
            return ret;
        }
    }

    private static interface MapContainer {
        public void addToSummary(String var1);

        public void countExpression(String var1, String var2, int var3);

        public Map<String, Integer> getTranscriptCountForCellBarcodesOverTranscriptThreshold(int var1);
    }

    private class PrefixGeneWithOrganismIterator
    implements CloseableIterator<SAMRecord> {
        private final CloseableIterator<SAMRecord> it;
        private final String[] referenceSequencePrefix;
        private final String[] genePrefix;

        public PrefixGeneWithOrganismIterator(CloseableIterator<SAMRecord> it) {
            this.referenceSequencePrefix = new String[SelectCellsByNumTranscripts.this.ORGANISM.size()];
            this.genePrefix = new String[SelectCellsByNumTranscripts.this.ORGANISM.size()];
            this.it = it;
            for (int i = 0; i < SelectCellsByNumTranscripts.this.ORGANISM.size(); ++i) {
                this.referenceSequencePrefix[i] = SelectCellsByNumTranscripts.this.ORGANISM.get(i) + "_";
                this.genePrefix[i] = SelectCellsByNumTranscripts.this.ORGANISM.get(i) + SelectCellsByNumTranscripts.ORGANISM_SEPARATOR;
            }
        }

        public void close() {
            this.it.close();
        }

        public boolean hasNext() {
            return this.it.hasNext();
        }

        public SAMRecord next() {
            SAMRecord rec = (SAMRecord)this.it.next();
            String reference = rec.getReferenceName();
            for (int i = 0; i < this.referenceSequencePrefix.length; ++i) {
                String geneExon;
                if (!reference.startsWith(this.referenceSequencePrefix[i]) || (geneExon = rec.getStringAttribute(SelectCellsByNumTranscripts.this.GENE_EXON_TAG)) == null) continue;
                rec.setAttribute(SelectCellsByNumTranscripts.this.GENE_EXON_TAG, (Object)(this.genePrefix[i] + geneExon));
                break;
            }
            return rec;
        }

        public void remove() {
            this.it.remove();
        }
    }
}

