/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.dropseqrna.beadsynthesis;

import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IterableAdapter;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import java.io.File;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.dropseqrna.beadsynthesis.AbstractDetectBeadSynthesisErrors;
import org.broadinstitute.dropseqrna.beadsynthesis.BeadSynthesisErrorData;
import org.broadinstitute.dropseqrna.beadsynthesis.BeadSynthesisErrorTypes;
import org.broadinstitute.dropseqrna.beadsynthesis.BiasedBarcodeCollection;
import org.broadinstitute.dropseqrna.beadsynthesis.DetectPrimerInUMI;
import org.broadinstitute.dropseqrna.cmdline.DropSeq;
import org.broadinstitute.dropseqrna.utils.BaseDistributionMetric;
import org.broadinstitute.dropseqrna.utils.BaseDistributionMetricCollection;
import org.broadinstitute.dropseqrna.utils.Bases;
import org.broadinstitute.dropseqrna.utils.SamHeaderUtil;
import org.broadinstitute.dropseqrna.utils.io.ErrorCheckingPrintStream;
import org.broadinstitute.dropseqrna.utils.readiterators.SamFileMergeUtil;
import org.broadinstitute.dropseqrna.utils.readiterators.SamHeaderAndIterator;
import org.broadinstitute.dropseqrna.utils.readiterators.UMIIterator;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;

@CommandLineProgramProperties(usage="For each cell, gather up all the UMIs.  An error in synthesis will result in the last base of the synthesis being fixed in >90% of the UMIs for that cell, across all genes.This fixed base is T.  For cell barcodes where this occurs, output the cell barcode in a file, as well as (optionally) pad the cell barcodes with N for the error bases.", usageShort="Detect barcode synthesis errors where the final base of a UMI is fixed across all UMIs of a cell.", programGroup=DropSeq.class)
public class DetectBeadSynthesisErrors
extends AbstractDetectBeadSynthesisErrors {
    private static final Log log = Log.getInstance(DetectBeadSynthesisErrors.class);
    @Option(doc="Output of detailed information on each cell barcode analyzed.  Each row is a single cell barcode.  The data has multiple columns: the cell barcode, the number of UMIs, then one column per UMI base position containing the count of the reads, with a | delimiter between bases.  Bases are ordered A,C,G,T for these columns.  An example output with a single base UMI would be:AAAAAA\t20\t\t5|4|6|5.")
    public File OUTPUT_STATS;
    private Double EXTREME_BASE_RATIO = 0.8;
    private DetectPrimerInUMI detectPrimerTool = null;

    protected int doWork() {
        if (this.PRIMER_SEQUENCE != null) {
            this.detectPrimerTool = new DetectPrimerInUMI(this.PRIMER_SEQUENCE);
        }
        UMIIterator iterator = this.prepareUMIIterator();
        BiasedBarcodeCollection biasedBarcodeCollection = this.findBiasedBarcodes(iterator);
        Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions = biasedBarcodeCollection.getBiasedBarcodes();
        int numCellsFilteredLowUMIs = biasedBarcodeCollection.getNumBarcodesFilteredLowUMIs();
        ErrorCheckingPrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting((File)this.OUTPUT_STATS));
        this.writeFile(errorBarcodesWithPositions.values(), out);
        this.writeSummary(errorBarcodesWithPositions.values(), numCellsFilteredLowUMIs, this.SUMMARY);
        if (this.OUTPUT != null) {
            this.cleanBAM(errorBarcodesWithPositions);
        }
        return 0;
    }

    private void cleanBAM(Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions) {
        log.info(new Object[]{"Cleaning BAM"});
        SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(this.INPUT, true);
        SamHeaderUtil.addPgRecord(headerAndIterator.header, this);
        SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(this.CREATE_INDEX.booleanValue()).makeSAMOrBAMWriter(headerAndIterator.header, true, this.OUTPUT);
        ProgressLogger pl = new ProgressLogger(log);
        for (SAMRecord r : new IterableAdapter(headerAndIterator.iterator)) {
            pl.record(r);
            if ((r = this.padCellBarcodeFix(r, errorBarcodesWithPositions, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.EXTREME_BASE_RATIO)) == null) continue;
            writer.addAlignment(r);
        }
        CloserUtil.close(headerAndIterator.iterator);
        writer.close();
    }

    SAMRecord padCellBarcodeFix(SAMRecord r, Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions, String cellBarcodeTag, String molecularBarcodeTag, double extremeBaseRatio) {
        String cellBC = r.getStringAttribute(cellBarcodeTag);
        BeadSynthesisErrorData bsed = errorBarcodesWithPositions.get(cellBC);
        if (bsed == null) {
            return r;
        }
        BeadSynthesisErrorTypes bset = this.getEnhancedErrorType(bsed, extremeBaseRatio, this.detectPrimerTool);
        if (bset == BeadSynthesisErrorTypes.NO_ERROR) {
            return r;
        }
        if (bset != BeadSynthesisErrorTypes.SYNTH_MISSING_BASE) {
            return null;
        }
        int polyTErrorPosition = bsed.getPolyTErrorPosition(this.EXTREME_BASE_RATIO);
        int umiLength = bsed.getBaseLength();
        int numErrors = umiLength - polyTErrorPosition + 1;
        if (numErrors > this.MAX_NUM_ERRORS) {
            return null;
        }
        String umi = r.getStringAttribute(molecularBarcodeTag);
        String cellBCFixed = this.padCellBarcode(cellBC, polyTErrorPosition, umiLength);
        String umiFixed = this.fixUMI(cellBC, umi, polyTErrorPosition);
        r.setAttribute(cellBarcodeTag, (Object)cellBCFixed);
        r.setAttribute(molecularBarcodeTag, (Object)umiFixed);
        return r;
    }

    private void writeSummary(Collection<BeadSynthesisErrorData> data, int numCellsFilteredLowUMIs, File out) {
        if (data.size() == 0) {
            return;
        }
        BeadSynthesisErrorsSummaryMetric m = new BeadSynthesisErrorsSummaryMetric();
        m.LOW_UMI_COUNT = numCellsFilteredLowUMIs;
        block7: for (BeadSynthesisErrorData bsde : data) {
            BeadSynthesisErrorTypes t = this.getEnhancedErrorType(bsde, this.EXTREME_BASE_RATIO, this.detectPrimerTool);
            ++m.NUM_BEADS;
            switch (t) {
                case SYNTH_MISSING_BASE: {
                    ++m.SYNTHESIS_MISSING_BASE;
                    m.incrementSynthesisMissingBase(bsde.getPolyTErrorPosition(this.EXTREME_BASE_RATIO));
                    continue block7;
                }
                case PRIMER: {
                    ++m.PRIMER_MATCH;
                    continue block7;
                }
                case SINGLE_UMI: {
                    ++m.SINGLE_UMI_ERROR;
                    continue block7;
                }
                case FIXED_FIRST_BASE: {
                    ++m.FIXED_FIRST_BASE;
                    continue block7;
                }
                case OTHER_ERROR: {
                    ++m.OTHER_ERROR_COUNT;
                    continue block7;
                }
            }
            ++m.NO_ERROR;
        }
        MetricsFile outFile = new MetricsFile();
        outFile.addMetric((MetricBase)m);
        outFile.addHistogram(m.getHistogram());
        outFile.write(out);
    }

    private void writeFile(Collection<BeadSynthesisErrorData> data, PrintStream out) {
        if (data.size() == 0) {
            out.close();
            return;
        }
        ArrayList<BeadSynthesisErrorData> dataArray = new ArrayList<BeadSynthesisErrorData>(data);
        Collections.sort(dataArray, new Comparator<BeadSynthesisErrorData>(){

            @Override
            public int compare(BeadSynthesisErrorData o1, BeadSynthesisErrorData o2) {
                int cmp = Integer.compare(o2.getUMICount(), o1.getUMICount());
                if (cmp != 0) {
                    return cmp;
                }
                return o1.getCellBarcode().compareTo(o2.getCellBarcode());
            }
        });
        BeadSynthesisErrorData first = (BeadSynthesisErrorData)dataArray.get(0);
        int umiLength = first.getBaseLength();
        this.writeBadBarcodeStatisticsFileHeader(umiLength, out);
        for (BeadSynthesisErrorData bsde : dataArray) {
            this.writeBadBarcodeStatisticsFileEntry(bsde, out);
        }
        out.close();
    }

    private void writeBadBarcodeStatisticsFileHeader(int umiLength, PrintStream out) {
        ArrayList<String> header = new ArrayList<String>();
        header.add("CELL_BARCODE");
        header.add("NUM_UMI");
        header.add("FIRST_BIASED_BASE");
        header.add(BeadSynthesisErrorTypes.SYNTH_MISSING_BASE.toString());
        header.add("ERROR_TYPE");
        for (int i = 0; i < umiLength; ++i) {
            header.add("BASE_" + Integer.toString(i + 1));
        }
        String h = StringUtils.join(header, (String)"\t");
        out.println(h);
    }

    private void writeBadBarcodeStatisticsFileEntry(BeadSynthesisErrorData data, PrintStream out) {
        ArrayList<String> line = new ArrayList<String>();
        line.add(data.getCellBarcode());
        line.add(Integer.toString(data.getUMICount()));
        int base = data.getErrorBase(this.EXTREME_BASE_RATIO);
        line.add(Integer.toString(base));
        int polyTErrorBase = data.getPolyTErrorPosition(this.EXTREME_BASE_RATIO);
        line.add(Integer.toString(polyTErrorBase));
        line.add(this.getEnhancedErrorType(data, this.EXTREME_BASE_RATIO, this.detectPrimerTool).toString());
        BaseDistributionMetricCollection bases = data.getBaseCounts();
        List<Integer> pos = bases.getPositions();
        for (Integer i : pos) {
            BaseDistributionMetric bdm = bases.getDistributionAtPosition(i);
            String formattedResult = this.format(bdm);
            line.add(formattedResult);
        }
        String outLine = StringUtils.join(line, (String)"\t");
        out.println(outLine);
    }

    private String format(BaseDistributionMetric bdm) {
        ArrayList<String> d = new ArrayList<String>();
        for (Bases b : Bases.values()) {
            char bb = b.getBase().charValue();
            int count = bdm.getCount(Character.valueOf(bb));
            d.add(Integer.toString(count));
        }
        return StringUtils.join(d, (String)"|");
    }

    protected String[] customCommandLineValidation() {
        for (File input : this.INPUT) {
            IOUtil.assertFileIsReadable((File)input);
        }
        IOUtil.assertFileIsWritable((File)this.OUTPUT_STATS);
        IOUtil.assertFileIsWritable((File)this.SUMMARY);
        if (this.OUTPUT != null) {
            IOUtil.assertFileIsWritable((File)this.OUTPUT);
        }
        return super.customCommandLineValidation();
    }

    public static void main(String[] args) {
        System.exit(new DetectBeadSynthesisErrors().instanceMain(args));
    }

    public class BeadSynthesisErrorsSummaryMetric
    extends MetricBase {
        public int NUM_BEADS = 0;
        public int NO_ERROR = 0;
        public int LOW_UMI_COUNT = 0;
        public int SYNTHESIS_MISSING_BASE = 0;
        public int SINGLE_UMI_ERROR = 0;
        public int PRIMER_MATCH = 0;
        public int FIXED_FIRST_BASE;
        public int OTHER_ERROR_COUNT = 0;
        private Histogram<Integer> histogram = new Histogram("SYNTHESIS_ERROR_BASE", "num cells");

        public void incrementSynthesisMissingBase(int position) {
            this.histogram.increment((Comparable)Integer.valueOf(position));
        }

        public Histogram<Integer> getHistogram() {
            return this.histogram;
        }
    }
}

