view scHicQualityControl.xml @ 1:5f2eacae0bb8 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/schicexplorer commit 72e1e90ac05a32dbd6fc675073429c0086048b18"
author iuc
date Tue, 10 Mar 2020 15:15:17 -0400
parents 061a8c076eb1
children 4a841ab67e3b
line wrap: on
line source

<tool id="schicexplorer_schicqualitycontrol" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>quality control for single-cell Hi-C interaction matrices</description>
    <macros>
        <token name="@BINARY@">scHicQualityControl</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        @BINARY@

        --matrix '$matrix_scooler'
        #if $chromosomes:
            #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ])
            --chromosomes $chromosome
        #end if
        --minimumReadCoverage $minimumReadCoverage
        --minimumDensity $minimumDensity
        --maximumRegionToConsider $maximumRegionToConsider
        #if $dpi:
            --dpi $dpi
        #end if 
        --outFileNameDensity plot_density.$image_file_format
        --outFileNameReadCoverage plot_read_coverage.$image_file_format
        --outFileNameQCReport report.txt
        --outputScool filtered_matrices.scool

        --threads @THREADS@

        && mv plot_density.$image_file_format plot_density
        && mv plot_read_coverage.$image_file_format plot_read_coverage


    ]]></command>
    <inputs>
        <expand macro="matrix_scooler_macro"/>
        <param name="minimumReadCoverage" type="integer" value="1000000"  label="Minimum read coverage" help='Remove all samples with a lower read coverage as this value.' />   
        <param name="minimumDensity" type="float" value="0.001"  label="Minimum density" help='Remove all samples with a lower density as this value.' />   
        <param name="maximumRegionToConsider" type="integer" value="30000000"  label="Maximum region to consider" help='To compute the density, consider only this genomic distance around the diagonal.' />   
        <param name='chromosomes' type='text' label='List of chromosomes to consider' help='Please separate the chromosomes by space'/>
        <param name='dpi' type='integer' label='DPI for image' help='Change the default resolution of the plot.' optional='true'/>
        <param name="image_file_format" type="select" label="Image output format">
            <option value="png" selected="True">png</option>
            <option value="svg">svg</option>
            <option value="pdf">pdf</option>
        </param>
    </inputs>
    <outputs>
        <data name='output_plot_density' from_work_dir='plot_density' format='png' label='Density'>
            <change_format>
                <when input="image_file_format" value="svg" format="svg" />
                <when input="image_file_format" value="pdf" format="pdf" />
            </change_format>
        </data>
        <data name='output_plot_read_coverage' from_work_dir='plot_read_coverage' format='png' label='Read coverage'>
            <change_format>
                <when input="image_file_format" value="svg" format="svg" />
                <when input="image_file_format" value="pdf" format="pdf" />
            </change_format>
        </data>
        <data name="report" from_work_dir="report.txt" format="txt" label="${tool.name} on ${on_string}: QC report"/>
        <data name="outFileName" from_work_dir="filtered_matrices.scool" format="scool" label="${tool.name} on ${on_string}: Filtered matrices"/>
        
    </outputs>
    <tests>
        <test>
            <param name='matrix_scooler' value='test_matrix.scool' />
            <param name='minimumReadCoverage' value='100000' />
            <param name='minimumDensity' value='0.001' />
            <param name='maximumRegionToConsider' value='30000000' />
            <param name="image_file_format" value="png" />
            <param name="dpi" value="300" />
            <output name="output_plot_density" file="scHicQualityControl/density.png" ftype="png" compare="sim_size" delta="35000"/>        
            <output name="output_plot_read_coverage" file="scHicQualityControl/coverage.png" ftype="png" compare="sim_size" delta="35000"/>        
            <output name="report" file="scHicQualityControl/qc_report.txt" ftype="txt" compare="sim_size" delta="35000"/>        
            <output name="outFileName" ftype="scool">
                <assert_contents>
                    <has_h5_keys keys='Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins,
                                    Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins/end, 
                                    Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/bins/start, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/chroms, 
                                    Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/chroms/name,
                                    Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/indexes, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/indexes/bin1_offset, 
                                    Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels, 
                                    Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels/bin2_id,
                                    Diploid_1_CGTACTAG_AAGGAGTA_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins, 
                                    Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins/end,
                                    Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/bins/start, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/chroms, 
                                    Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/chroms/name,
                                    Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/indexes, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/indexes/bin1_offset, 
                                    Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels,
                                    Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels/bin2_id,
                                    Diploid_1_CGTACTAG_ACTGCATA_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins, 
                                    Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins/end, 
                                    Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/bins/start, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/chroms, 
                                    Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/chroms/name, 
                                    Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/indexes, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/indexes/bin1_offset, 
                                    Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels, 
                                    Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels/bin2_id, 
                                    Diploid_1_CGTACTAG_CTAAGCCT_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins, 
                                    Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins/end, 
                                    Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/bins/start, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/chroms, 
                                    Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/chroms/name, 
                                    Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/indexes, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/indexes/bin1_offset, 
                                    Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels, 
                                    Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels/bin2_id,
                                    Diploid_1_CGTACTAG_CTCTCTAT_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins,
                                    Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins/chrom, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins/end, 
                                    Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/bins/start, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/chroms,
                                    Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/chroms/length, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/chroms/name, 
                                    Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/indexes, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/indexes/bin1_offset, 
                                    Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/indexes/chrom_offset, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels, 
                                    Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels/bin1_id, Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels/bin2_id,
                                    Diploid_1_CGTACTAG_GTAAGGAG_R1fastqgz/pixels/count, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz, 
                                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins/chrom, 
                                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins/end, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/bins/start, 
                                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/chroms, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/chroms/length,
                                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/chroms/name, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/indexes, 
                                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/indexes/bin1_offset, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/indexes/chrom_offset,
                                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels/bin1_id,
                                    Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels/bin2_id, Diploid_1_CGTACTAG_TCTCTCCG_R1fastqgz/pixels/count, 
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins, 
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins/chrom, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins/end,
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/bins/start, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/chroms,
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/chroms/length, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/chroms/name, 
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/indexes, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/indexes/bin1_offset,
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/indexes/chrom_offset, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels, 
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels/bin1_id, Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels/bin2_id,
                                    Diploid_1_TAAGGCGA_AAGGAGTA_R1fastqgz/pixels/count, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz, 
                                    Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins/chrom,
                                    Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins/end, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/bins/start,
                                    Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/chroms, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/chroms/length, 
                                    Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/chroms/name, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/indexes, 
                                    Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/indexes/bin1_offset, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/indexes/chrom_offset, 
                                    Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels/bin1_id, 
                                    Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels/bin2_id, Diploid_1_TAAGGCGA_CTAAGCCT_R1fastqgz/pixels/count,
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins, 
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins/chrom, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins/end,
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/bins/start, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/chroms,
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/chroms/length, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/chroms/name,
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/indexes, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/indexes/bin1_offset, 
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/indexes/chrom_offset, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels,
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels/bin1_id, Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels/bin2_id, 
                                    Diploid_2_AAGAGGCA_CGTCTAAT_R1fastqgz/pixels/count, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz,
                                    Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins/chrom, 
                                    Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins/end, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/bins/start,
                                    Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/chroms, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/chroms/length, 
                                    Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/chroms/name, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/indexes,
                                    Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/indexes/bin1_offset, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/indexes/chrom_offset, 
                                    Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels/bin1_id, 
                                    Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels/bin2_id, Diploid_2_AAGAGGCA_TCTCTCCG_R1fastqgz/pixels/count'/></assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

Quality control
===============

scHicQualityControl removes scHi-C interaction matrices with a too low read coverage or density. It creates four output files:

1. Read coverage plot

.. image:: $PATH_TO_IMAGES/read_coverage.png


2. Density plot

.. image:: $PATH_TO_IMAGES/density.png

3. Quality report

.. code-block::

    scHi-C sample contained 3882 cells:
    Number of removed matrices containing bad chromosomes 0
    Number of removed matrices due to low read coverage (< 100000): 1374
    Number of removed matrices due to too many zero bins (< 0.02 density, within 30000000 relative genomic distance): 610
    2508 samples passed the quality control. Please consider matrices with a low read coverage may be the matrices with a low density and overlap therefore.

4. The scHi-C scool matrix with the filtered matrices.

For more information about scHiCExplorer please consider our documentation on readthedocs.io_

.. _readthedocs.io: http://schicexplorer.readthedocs.io/
]]></help>
    <expand macro="citations" />

</tool>