diff hicQuickQC.xml @ 0:269c4246279d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 3b41d687ff30583540d055f6995de00530cca81d-dirty"
author bgruening
date Mon, 16 Dec 2019 15:38:29 -0500
parents
children b599d3000966
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hicQuickQC.xml	Mon Dec 16 15:38:29 2019 -0500
@@ -0,0 +1,131 @@
+<tool id="hicexplorer_hicquickqc" name="@BINARY@" version="@WRAPPER_VERSION@.0">
+    <description>get a first quality estimate of Hi-C data</description>
+    <macros>
+        <token name="@BINARY@">hicQuickQC</token>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+   
+        mkdir ./QCfolder &&
+        mkdir $qc.files_path &&
+        @BINARY@
+
+            --samFiles
+            #for $repeat in $samFiles:
+                '${repeat.samFile}'
+            #end for
+
+            #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionRestrictionCutFile":
+                --restrictionCutFile '$restrictionCutFileBinSize_conditional.restrictionCutFile'
+                --minDistance $restrictionCutFileBinSize_conditional.minDistance
+                --maxLibraryInsertSize $restrictionCutFileBinSize_conditional.maxLibraryInsertSize
+            #end if
+
+            #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionBinSize":
+                --binSize 
+                #for $repeat in $restrictionCutFileBinSize_conditional.binSizes
+                    '${repeat.binSize}'
+                #end for
+            #end if
+
+
+            #if $restrictionSequence:
+                --restrictionSequence '$restrictionSequence'
+            #end if
+            
+            #if $danglingSequence:
+                --danglingSequence '$danglingSequence'
+            #end if
+            --QCfolder ./QCfolder
+
+            --lines $lines
+
+        && mv ./QCfolder/* $qc.files_path/
+        && mv $qc.files_path/hicQC.html $qc
+        && mv $qc.files_path/*.log raw_qc
+    ]]></command>
+    <inputs>
+         <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" 
+                help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file.">
+            <param name="samFile" type="data" format="sam,qname_input_sorted.bam"/>
+        </repeat>
+        <conditional name="restrictionCutFileBinSize_conditional">
+            <param name="restrictionCutFileBinSize_selector" type="select" label="Choose to use a restriction cut file or a bin size">
+                <option value="optionRestrictionCutFile">Restriction cut file</option>
+                <option value="optionBinSize" selected="True">Bin size</option>
+            </param>
+            <when value="optionRestrictionCutFile">
+                <param argument="--restrictionCutFile" type="data" format="bed" optional="true" label="BED file with all restriction cut places"
+                        help="Should contaion only  mappable restriction sites. If given, the bins are set to match the restriction fragments
+                        (i.e. the region between one restriction site and the next)." />
+                <param argument="--minDistance" type="integer" value="" optional="true" label="Minimum distance between restriction sites"
+                        help="Restriction sites that are closer that this distance are merged into one.
+                        This option only applies if --restrictionCutFile is given."/>
+                <param argument="--maxLibraryInsertSize" type="integer" value="" optional="true"
+                        label="Maximum library insert size defines different cut offs based on the maximum expected library size"
+                        help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer)
+                              which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates
+                              belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate
+                              is too far away from the nearest restriction site." />
+            </when>
+            <when value="optionBinSize">
+                <repeat  name='binSizes' title='Bin size in bp' min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites.
+                        Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file.">
+                    <param argument="--binSize" type="integer" value="" optional="true" label="Bin size in bp"/>
+                </repeat>
+            </when>
+        </conditional>
+
+        <param argument="--restrictionSequence" type="text" optional="true" label="Sequence of the restriction site"
+            help="This is used to discard reads that end/start with such sequence and that are considered un-ligated fragments or
+            &quot;dangling-ends&quot;. If not given, such statistics will not be available." />
+
+        <param argument="--danglingSequence" type="text" optional="true" label="Dangling sequence"
+            help="Sequence left by the restriction enzyme after cutting.
+                    Each restriction enzyme recognizes a different DNA sequence and, 
+                    after cutting, they leave behind a specific ‘sticky’ end or dangling end sequence.
+                    For example, for HindIII the restriction site is AAGCTT and the dangling end is AGCT. 
+                    For DpnII, the restriction site and dangling end sequence are the same: GATC. 
+                    This information is easily found on the description of the restriction enzyme.
+                    The dangling sequence is used to classify and report reads whose 5’ end starts with such sequence as dangling-end reads.
+                    A significant portion of dangling-end reads in a sample are indicative of a problem with the re-ligation step of the protocol. "/>
+
+        <param argument="--lines" optional='true'  type="integer" label="Lines" help= "Number of lines to consider for the qc test run." value='1000000'/>
+        
+    </inputs>
+    <outputs>
+       <data name="qc" format="html" label="${tool.name} QC on ${on_string}"/>
+       <data name="raw_qc" from_work_dir='raw_qc' format='txt' label="${tool.name} raw QC on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <repeat name="samFiles">
+                <param name="samFile" value="small_test_R1_unsorted.sam"/>
+            </repeat>
+            <repeat name="samFiles">
+                <param name="samFile" value="small_test_R2_unsorted.sam"/>
+            </repeat>
+            <conditional name="restrictionCutFileBinSize_conditional">
+                <param name="restrictionCutFileBinSize_selector" value="optionBinSize"/>
+                <repeat name='binSizes'>
+                    <param name="binSize" value="5000"/>
+                </repeat>
+            </conditional>
+            <param name="lines" value='1000'/>
+            <output name="raw_qc" file='hicQuickQC/QC.log' compare='diff' lines_diff='2'/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Quick QC 
+====================
+
+Get a quick impression on the quality of your Hi-C data. The first 
+
+For more information about HiCExplorer please consider our documentation on readthedocs.io_
+
+.. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
+]]></help>
+    <expand macro="citations" />
+</tool>