view hicQuickQC.xml @ 2:909125ec301f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 4b602d427e0fc0da5353a4510798349de98e4ae4"
author iuc
date Wed, 11 Mar 2020 17:01:31 -0400
parents b599d3000966
children 9c5078f3926b
line wrap: on
line source

<tool id="hicexplorer_hicquickqc" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>get a first quality estimate of Hi-C data</description>
    <macros>
        <token name="@BINARY@">hicQuickQC</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[

        mkdir ./QCfolder &&
        mkdir $qc.files_path &&
        @BINARY@

            --samFiles
            #for $repeat in $samFiles:
                '${repeat.samFile}'
            #end for

            #if $restrictionSequence:
                --restrictionSequence '$restrictionSequence'
            #end if

            #if $danglingSequence:
                --danglingSequence '$danglingSequence'
            #end if
            --QCfolder ./QCfolder

            --lines $lines

        && mv ./QCfolder/* $qc.files_path/
        && mv $qc.files_path/hicQC.html $qc
        && mv $qc.files_path/*.log raw_qc
    ]]></command>
    <inputs>
         <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)"
                help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file.">
            <param name="samFile" type="data" format="sam,qname_input_sorted.bam"/>
        </repeat>

        <param argument="--restrictionSequence" type="text" optional="true" label="Sequence of the restriction site"
            help="This is used to discard reads that end/start with such sequence and that are considered un-ligated fragments or
            &quot;dangling-ends&quot;. If not given, such statistics will not be available." />

        <param argument="--danglingSequence" type="text" optional="true" label="Dangling sequence"
            help="Sequence left by the restriction enzyme after cutting.
                    Each restriction enzyme recognizes a different DNA sequence and,
                    after cutting, they leave behind a specific ‘sticky’ end or dangling end sequence.
                    For example, for HindIII the restriction site is AAGCTT and the dangling end is AGCT.
                    For DpnII, the restriction site and dangling end sequence are the same: GATC.
                    This information is easily found on the description of the restriction enzyme.
                    The dangling sequence is used to classify and report reads whose 5’ end starts with such sequence as dangling-end reads.
                    A significant portion of dangling-end reads in a sample are indicative of a problem with the re-ligation step of the protocol. "/>

        <param argument="--lines" optional='true'  type="integer" label="Lines" help= "Number of lines to consider for the qc test run." value='1000000'/>

    </inputs>
    <outputs>
       <data name="qc" format="html" label="${tool.name} QC on ${on_string}"/>
       <data name="raw_qc" from_work_dir='raw_qc' format='txt' label="${tool.name} raw QC on ${on_string}" />
    </outputs>
    <tests>
        <test>
            <repeat name="samFiles">
                <param name="samFile" value="small_test_R1_unsorted.sam"/>
            </repeat>
            <repeat name="samFiles">
                <param name="samFile" value="small_test_R2_unsorted.sam"/>
            </repeat>
            <conditional name="restrictionCutFileBinSize_conditional">
                <param name="restrictionCutFileBinSize_selector" value="optionBinSize"/>
                <repeat name='binSizes'>
                    <param name="binSize" value="5000"/>
                </repeat>
            </conditional>
            <param name="lines" value='1000'/>
            <output name="raw_qc" file='hicQuickQC/QC.log' compare='diff' lines_diff='2'/>
        </test>
    </tests>
    <help><![CDATA[

Quick QC
====================

Get a quick impression on the quality of your Hi-C data. hicQuickQC considers the first n lines of two bam/sam files to get a first estimate of the quality of the data. It is highly recommended to set the restriction enzyme and dangling end parameter to get a good quality report.

The default is to read the first 1,000,000 reads of the mapped bam files to get a quality estimate of the Hi-C data.

For more information about HiCExplorer please consider our documentation on readthedocs.io_

.. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
]]></help>
    <expand macro="citations" />
</tool>