view hicBuildMatrix.xml @ 4:652fdcbff650 draft

planemo upload for repository https://github.com/maxplanck-ie/HiCExplorer/tree/master/galaxy/wrapper/ commit dfa5a68cb20842407941c7ffda9ef956a0e86a04
author iuc
date Sat, 16 Dec 2017 16:32:57 -0500
parents 2a13bfe5bff2
children 3bc1425f9000
line wrap: on
line source

<tool id="hicexplorer_hicbuildmatrix" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>creates a contact matrix</description>
    <macros>
        <token name="@BINARY@">hicBuildMatrix</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" >
        <requirement type="package" version="1.6">samtools</requirement>
    </expand>
    <command detect_errors="exit_code"><![CDATA[

        mkdir ./QCfolder &&
        mkdir $qc.files_path &&
        hicBuildMatrix

            --samFiles
            #for $repeat in $samFiles:
                '${repeat.samFile}'
            #end for

            #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionRestrictionCutFile":
                --restrictionCutFile '$restrictionCutFileBinSize_conditional.restrictionCutFile'
                --minDistance $restrictionCutFileBinSize_conditional.minDistance
                --maxLibraryInsertSize $restrictionCutFileBinSize_conditional.maxLibraryInsertSize          
            #end if

            #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionBinSize":
                --binSize $restrictionCutFileBinSize_conditional.binSize
            #end if


            #if $restrictionSequence:
                --restrictionSequence '$restrictionSequence'
            #end if

            #if $region:
                --region '$region'
            #end if

            --outFileName ./matrix.h5
            --outBam ./unsorted.bam

            $keepSelfCircles

            #if $minMappingQuality and $minMappingQuality is not None:
                --minMappingQuality $minMappingQuality
            #end if

            #if $danglingSequence:
                --danglingSequence '$danglingSequence'
            #end if

            --threads @THREADS@
            
            --QCfolder ./QCfolder
        &&
        mv ./QCfolder/* $qc.files_path/
        &&
        mv $qc.files_path/hicQC.html $qc
        && samtools sort ./unsorted.bam -o sorted.bam
       
]]>
    </command>
    <inputs>
        <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process">
            <param name="samFile" type="data" format="sam,bam"/>
        </repeat>
        <conditional name="restrictionCutFileBinSize_conditional">
            <param name="restrictionCutFileBinSize_selector" type="select" label="Choose to use a restriction cut file or a bin size">
                <option value="optionRestrictionCutFile">Restriction cut file</option>
                <option value="optionBinSize" selected="True">Bin size</option>
            </param>
            <when value="optionRestrictionCutFile">
                <param argument="--restrictionCutFile" type="data" format="bed" optional="true" label="BED file with all restriction cut places"
                        help="Should contaion only  mappable restriction sites. If given, the bins are set to match the restriction fragments
                        (i.e. the region between one restriction site and the next)." />
                <param argument="--minDistance" type="integer" value="" optional="true" label="Minimum distance between restriction sites"
                        help="Restriction sites that are closer that this distance are merged into one.
                        This option only applies if --restrictionCutFile is given."/>
                <param argument="--maxLibraryInsertSize" type="integer" value="" optional="true"
                        label="Maximum library insert size defines different cut offs based on the maximum expected library size"
                        help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer)
                              which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates
                              belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate
                              is too far away from the nearest restriction site." />
            </when>
            <when value="optionBinSize">
                <param argument="--binSize" type="integer" value="" optional="true" label="Bin size in bp"
                    help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites.
                    Otherwise all reads in the interval are considered. "/>
            </when>
        </conditional>

        <param argument="--restrictionSequence" type="text" optional="true" label="Sequence of the restriction site"
            help="This is used to discard reads that end/start with such sequence and that are considered un-ligated fragments or
            &quot;dangling-ends&quot;. If not given, such statistics will not be available." />

        <expand macro="region" />

        <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue=""
            label="Keep self circles"
            help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." />

        <expand macro="minMappingQuality" />

        <param argument="--danglingSequence" type="text" optional="true" label="The dangling sequence"
            help="Dangling end sequence left by the restriction enzyme. For DpnII for example, 
                    the dangling end is the same restriction sequence. This is used
                    to discard reads that end/start with such sequence 
                    and that are considered un-ligated fragments or 
                    'dangling-ends'. If not given, such statistics will 
                    not be available."/>
    </inputs>
    <outputs>
        <data name="outBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}"/>
        <data name="outFileName" from_work_dir="matrix.h5" format="h5" label="${tool.name} MATRIX on ${on_string}"/>
        <data name="qc" format="html" label="${tool.name} QC"/>
    </outputs>
    <tests>
        <test>
            <repeat name="samFiles">
                <param name="samFile" value="small_test_R1_unsorted.bam" ftype="sam" />
            </repeat>
            <repeat name="samFiles">
                <param name="samFile" value="small_test_R2_unsorted.bam" ftype="sam"/>
            </repeat>
            <conditional name="restrictionCutFileBinSize_conditional">
                <param name="restrictionCutFileBinSize_selector" value="optionBinSize"/>
                <param name="binSize" value="5000"/>
            </conditional>
            <output name="outBam" file="small_test_matrix_result_sorted.bam" ftype="bam"/>
            <output name="outFileName" file="small_test_matrix.h5" ftype="h5" compare="sim_size" delta="30000"/>
        </test>
    </tests>
    <help><![CDATA[

Creation of the contact matrix
===============================

``hicBuildMatrix`` creates a contact matrix based on Hi-C read pairs. It requires two sam or bam files 
corresponding to the first and second mates of the paired-end H-C reads. The sam and bam files should
 not be sorted by position. There are two main options to create the Hi-C contact matrix, either by 
fixed bin size (eg. 10.000 bp) or by bins of variable restriction fragment size length. 
``hicBuildMatrix`` generates a quality control output that can be used to analyze the quality of the Hi-C reads.

Input
-----

`hicBuildMatrix` is having the following parameters:

Parameters
__________


- two input BAM/SAM files
- a bin size
- a restriction cut file as an alternative to the bin size
- restriction sequence: e.g. HindIII: GATC



Output
------

`hicBuildMatrix` creates as an output:
    - the contact matrix
    - a bam file with the accepted alignments
    - a quality report.

Example plot
-----------------------------------------------------------------
.. image:: $PATH_TO_IMAGES/SRR027956.svg
   :width: 70%

Contact matrix created with `hicPlotMatrix`.

Quality report
--------------

The quality report gives you information about:

- how many pairs were used to build the contact matrix
- dangling end pairs: These are reads that start with the restriction site and constitute reads that were digested but no ligated.
- same fragment pairs: These are read mates, facing inward, separated by up to 800 bp that do not have a restriction enzyme in between. These read pairs are not valid Hi-C pairs.
- self circles: Self circles are defined as pairs within 25kb with 'outward' read orientation
- self ligations: These are read pairs with a restriction site in between that are within 800 bp.

Contact distance:
_________________
- inter chromosomal
- short range < 20 kb
- long range

Read orientation:
_________________
- inward pairs
- outward pairs
- left pairs
- right pairs 

.. image:: $PATH_TO_IMAGES/hicQC.png
   :width: 70 %

| For more information about HiCExplorer please consider our documentation on readthedocs.io_.

.. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html


]]></help>
    <expand macro="citations" />
</tool>