/*
 * Decompiled with CFR 0.152.
 */
package org.broadinstitute.dropseqrna.beadsynthesis;

import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.broadinstitute.dropseqrna.barnyard.BarcodeListRetrieval;
import org.broadinstitute.dropseqrna.barnyard.ParseBarcodeFile;
import org.broadinstitute.dropseqrna.barnyard.digitalexpression.UMICollection;
import org.broadinstitute.dropseqrna.beadsynthesis.BeadSynthesisErrorData;
import org.broadinstitute.dropseqrna.beadsynthesis.BeadSynthesisErrorTypes;
import org.broadinstitute.dropseqrna.beadsynthesis.BiasedBarcodeCollection;
import org.broadinstitute.dropseqrna.beadsynthesis.DetectPrimerInUMI;
import org.broadinstitute.dropseqrna.utils.GroupingIterator;
import org.broadinstitute.dropseqrna.utils.StringInterner;
import org.broadinstitute.dropseqrna.utils.readiterators.SamFileMergeUtil;
import org.broadinstitute.dropseqrna.utils.readiterators.UMIIterator;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.Option;

public abstract class AbstractDetectBeadSynthesisErrors
extends CommandLineProgram {
    private static final Log log = Log.getInstance(AbstractDetectBeadSynthesisErrors.class);
    @Option(shortName="I", doc="The input SAM or BAM files to analyze.  They must all have the same sort order")
    public List<File> INPUT;
    @Option(doc="Output a summary of the error types and frequencies detected")
    public File SUMMARY;
    @Option(shortName="O", doc="The output BAM, with the synthesis error barcodes removed", optional=true)
    public File OUTPUT;
    @Option(doc="The sequence of the primer.", optional=true)
    public String PRIMER_SEQUENCE = null;
    @Option(doc="When looking at fixed UMIs, see if the edit distance from the UMI to the primer is within this threshold.  0 indicates a perfect match between the primer and the UMI.")
    public Integer EDIT_DISTANCE = 0;
    @Option(doc="The cell barcode tag.")
    public String CELL_BARCODE_TAG = "XC";
    @Option(doc="The molecular barcode tag.")
    public String MOLECULAR_BARCODE_TAG = "XM";
    @Option(doc="The Gene/Exon tag")
    public String GENE_EXON_TAG = "GE";
    @Option(doc="The strand of the gene(s) the read overlaps.  When there are multiple genes, they will be comma-separated.")
    public String STRAND_TAG = "GS";
    @Option(doc="The map quality of the read to be included when calculating the barcodes in <NUM_BARCODES>")
    public Integer READ_MQ = 10;
    @Option(doc="The minimum number of UMIs required to report a cell barcode")
    public Integer MIN_UMIS_PER_CELL = 25;
    @Option(doc="Find the top set of <NUM_BARCODES> most common barcodes by HQ reads and only use this set for analysis.", mutex={"CELL_BC_FILE"})
    public Integer NUM_BARCODES;
    @Option(doc="Override NUM_BARCODES, and process reads that have the cell barcodes in this file instead.  The file has 1 column with no header.", mutex={"NUM_BARCODES"})
    public File CELL_BC_FILE;
    @Option(doc="Repair Synthesis errors with at most this many missing bases detected.", optional=true)
    public Integer MAX_NUM_ERRORS = 1;
    SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().enable(new SamReaderFactory.Option[]{SamReaderFactory.Option.EAGERLY_DECODE});
    private Character PAD_CHARACTER = Character.valueOf('N');

    public BiasedBarcodeCollection findBiasedBarcodes(UMIIterator iter) {
        GroupingIterator<UMICollection> groupingIterator = new GroupingIterator<UMICollection>((Iterator<UMICollection>)((Object)iter), new Comparator<UMICollection>(){

            @Override
            public int compare(UMICollection o1, UMICollection o2) {
                return o1.getCellBarcode().compareTo(o2.getCellBarcode());
            }
        });
        HashMap<String, BeadSynthesisErrorData> errorBarcodesWithPositions = new HashMap<String, BeadSynthesisErrorData>();
        StringInterner umiStringCache = new StringInterner();
        int counter = 0;
        int numCellsFilteredLowUMIs = 0;
        Iterator iterator = groupingIterator.iterator();
        while (iterator.hasNext()) {
            List umiCollectionList = (List)iterator.next();
            String cellBarcode = ((UMICollection)umiCollectionList.get(0)).getCellBarcode();
            BeadSynthesisErrorData bsed = new BeadSynthesisErrorData(cellBarcode);
            for (UMICollection umis : umiCollectionList) {
                int transcriptCounts = umis.getDigitalExpression(1, 1, false);
                int readCounts = umis.getDigitalExpression(1, 1, true);
                Collection<String> umiCol = umis.getMolecularBarcodes();
                umiCol = this.getUMIsFromCache(umiCol, umiStringCache);
                bsed.addUMI(umiCol);
                bsed.incrementReads(readCounts);
                bsed.incrementTranscripts(transcriptCounts);
                if (++counter % 1000000 != 0) continue;
                log.info(new Object[]{"Processed [" + counter + "] Cell/Gene UMIs."});
            }
            if (bsed.getUMICount() < this.MIN_UMIS_PER_CELL) {
                ++numCellsFilteredLowUMIs;
                continue;
            }
            errorBarcodesWithPositions.put(cellBarcode, bsed);
        }
        BiasedBarcodeCollection result = new BiasedBarcodeCollection(errorBarcodesWithPositions, numCellsFilteredLowUMIs);
        return result;
    }

    private Collection<String> getUMIsFromCache(Collection<String> umis, StringInterner umiStringCache) {
        ArrayList<String> result = new ArrayList<String>(umis.size());
        for (String umi : umis) {
            result.add(umiStringCache.intern(umi));
        }
        return result;
    }

    public String fixUMI(String cellBarcode, String umi, int errorPosition) {
        int badBasesUMI = umi.length() - errorPosition;
        int lastBase = cellBarcode.length();
        int firstBaseToPad = lastBase - badBasesUMI - 1;
        String cellBCBases = cellBarcode.substring(firstBaseToPad, cellBarcode.length());
        String umiRemaining = umi.substring(0, errorPosition - 1);
        return cellBCBases + umiRemaining;
    }

    public UMIIterator prepareUMIIterator() {
        List<String> barcodes = this.getCellBarcodes();
        return new UMIIterator(SamFileMergeUtil.mergeInputs(this.INPUT, false, this.samReaderFactory), this.GENE_EXON_TAG, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.STRAND_TAG, this.READ_MQ, false, false, barcodes, true);
    }

    public List<String> getCellBarcodes() {
        if (this.CELL_BC_FILE != null) {
            IOUtil.assertFileIsReadable((File)this.CELL_BC_FILE);
            List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(this.CELL_BC_FILE);
            log.info(new Object[]{"Found " + cellBarcodes.size() + " cell barcodes in file"});
            return cellBarcodes;
        }
        log.info(new Object[]{"Gathering barcodes for the top [" + this.NUM_BARCODES + "] cells"});
        return new BarcodeListRetrieval().getListCellBarcodesByReadCount(SamFileMergeUtil.mergeInputs(this.INPUT, (boolean)false, (SamReaderFactory)this.samReaderFactory).iterator, this.CELL_BARCODE_TAG, (int)this.READ_MQ, null, this.NUM_BARCODES);
    }

    BeadSynthesisErrorTypes getEnhancedErrorType(BeadSynthesisErrorData data, double extremeBaseRatio, DetectPrimerInUMI detectPrimerTool) {
        BeadSynthesisErrorTypes errorType = data.getErrorType(extremeBaseRatio);
        if (errorType != BeadSynthesisErrorTypes.SINGLE_UMI) {
            return errorType;
        }
        if (detectPrimerTool != null) {
            String singleUMI = data.getUMICounts().getKeysOrderedByCount(true).get(0);
            boolean primerDetected = detectPrimerTool.isStringInPrimer(singleUMI, this.EDIT_DISTANCE);
            if (primerDetected) {
                return BeadSynthesisErrorTypes.PRIMER;
            }
            return errorType;
        }
        return errorType;
    }

    String padCellBarcode(String cellBarcode, int errorPosition, int umiLength) {
        if (errorPosition == -1) {
            return cellBarcode;
        }
        int badBasesUMI = umiLength - errorPosition;
        int lastBase = cellBarcode.length();
        int firstBaseToPad = lastBase - badBasesUMI - 1;
        char[] charAr = cellBarcode.toCharArray();
        for (int i = firstBaseToPad; i < lastBase; ++i) {
            charAr[i] = this.PAD_CHARACTER.charValue();
        }
        return new String(charAr);
    }
}

