comparison hicDetectLoops.xml @ 6:c3f9037423bd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 2a0943e78bdc8ebb13f181399206a9eea37ed78f"
author iuc
date Tue, 16 Mar 2021 15:32:15 +0000
parents 1119cdd14ddb
children 0d6c2ff54c76
comparison
equal deleted inserted replaced
5:f16ab52334dd 6:c3f9037423bd
1 <tool id="hicexplorer_hicdetectloops" name="@BINARY@" version="@WRAPPER_VERSION@.0"> 1 <tool id="hicexplorer_hicdetectloops" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>searches for enriched regions</description> 2 <description>searches for enriched regions</description>
3 <macros> 3 <macros>
4 <token name="@BINARY@">hicDetectLoops</token> 4 <token name="@BINARY@">hicDetectLoops</token>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' && 9 ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
10 @BINARY@ 10 @BINARY@
11 11
12 --matrix 'matrix.$matrix_h5_cooler.ext' 12 --matrix 'matrix.$matrix_h5_cooler.ext'
13
14 #if $peakWidth: 13 #if $peakWidth:
15 --peakWidth $peakWidth 14 --peakWidth $peakWidth
16 #end if 15 #end if
17 16
18 #if $windowSize: 17 #if $windowSize:
25 24
26 #if $peakInteractionsThreshold: 25 #if $peakInteractionsThreshold:
27 --peakInteractionsThreshold $peakInteractionsThreshold 26 --peakInteractionsThreshold $peakInteractionsThreshold
28 #end if 27 #end if
29 28
30 #if $maximumInteractionPercentageThreshold: 29 #if $obsExpThreshold:
31 --maximumInteractionPercentageThreshold $maximumInteractionPercentageThreshold 30 --obsExpThreshold $obsExpThreshold
32 #end if 31 #end if
32
33 #if $pValue: 33 #if $pValue:
34 --pValue $pValue 34 --pValue $pValue
35 #end if 35 #end if
36 #if $maxLoopDistance: 36 #if $maxLoopDistance:
37 --maxLoopDistance $maxLoopDistance 37 --maxLoopDistance $maxLoopDistance
44 #if $chromosomes: 44 #if $chromosomes:
45 #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ]) 45 #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ])
46 --chromosomes $chromosome 46 --chromosomes $chromosome
47 #end if 47 #end if
48 48
49 --statisticalTest $statisticalTest_selector 49 --expected $expected
50
51 --outFileName output_loop.bedgraph 50 --outFileName output_loop.bedgraph
52
53 --threads @THREADS@ -tpc @THREADS@ 51 --threads @THREADS@ -tpc @THREADS@
54 ]]> 52 ]]>
55 </command> 53 </command>
56 <inputs> 54 <inputs>
57 <expand macro="matrix_h5_cooler_macro" /> 55 <expand macro="matrix_h5_cooler_macro" />
58 <param argument="--peakWidth" type="integer" optional='true' label="Peak width" help= "The width of the peak region in bins. The square around the peak will include (2 * peakWidth)^2 bins." /> 56 <param argument="--peakWidth" type="integer" optional='true' label="Peak width" help= "The width of the peak region in bins. The square around the peak will include (2 * peakWidth)^2 bins." />
59 <param argument="--windowSize" type="integer" optional='true' label="Window size" help= "The window size for the neighborhood region the peak is located in. All values from this region (exclude the values from the peak 57 <param argument="--windowSize" type="integer" optional='true' label="Window size" help= "The window size for the neighborhood region the peak is located in. All values from this region (exclude the values from the peak
60 region) are tested against the peak region for significant difference. The square will have the size of (2 * windowSize)^2 bins" /> 58 region) are tested against the peak region for significant difference. The square will have the size of (2 * windowSize)^2 bins" />
61 <param argument="--pValuePreselection" type="float" label="P-value preselection" help= "Only candidates with p-values less the given threshold will be considered as candidates. 59 <param argument="--pValuePreselection" type="float" label="P-value preselection" help= "Only candidates with p-values less the given threshold will be considered as candidates.
62 For each genomic distance a negative binomial distribution is fitted and for each pixel a p-value given by the cumulative density function is given. 60 For each genomic distance a negative binomial distribution is fitted and for each pixel a p-value given by the cumulative density function is given.
63 This does NOT influence the p-value for the neighborhood testing." value='0.05'/> 61 This does NOT influence the p-value for the neighborhood testing." value='0.05' />
64 <param argument="--peakInteractionsThreshold" type="integer" label="Minimum interaction number" help= "The minimum number of interactions a detected peaks needs to have to be considered." value='5' /> 62 <param argument="--peakInteractionsThreshold" type="integer" label="Minimum interaction number" help= "The minimum number of interactions a detected peaks needs to have to be considered." value='5' />
65 <param argument="--maximumInteractionPercentageThreshold" type="float" value='0.1' label="Maximum interaction share" help= "For each genomic distance the maximum value is considered and all candidates need to have at least \'max_value * maximumInteractionPercentageThreshold\' interactions." /> 63
66 <param argument="--pValue" type="float" label="P-value" help= "Rejection level for the statistical test for H0. H0 is peak region and background have the same distribution." value='0.05'/> 64 <!-- new-->
67 <param argument="--maxLoopDistance" optional='true' type="integer" label="Maximal loop distance" help= "Maximum genomic distance of a loop, usually loops are within a distance of ~2MB." value='2000000'/> 65 <param argument="--obsExpThreshold" type="float" label="Obs/exp interaction threshold" help= "The minimum number of obs/exp interactions a detected peaks needs to have to be considered" value='1.5' />
68 <param argument="--minLoopDistance" optional='true' type="integer" label="Minimum loop distance" help= "Minimum genomic distance of a loop to be considered." value='100000'/> 66
69 <param argument="--chromosomes" optional='true' type="text" label="Chromosomes to include" help= "Chromosomes to include in the analysis. If not set, all chromosomes are included." /> 67 <param argument="--pValue" type="float" label="P-value" help= "Rejection level for the statistical test for H0. H0 is peak region and background have the same distribution." value='0.05' />
70 <param argument="--region" optional='true' type="text" label="Chromosomes to include" help= "The format is chr:start-end." /> 68 <param argument="--maxLoopDistance" optional='true' type="integer" label="Maximal loop distance" help= "Maximum genomic distance of a loop, usually loops are within a distance of ~2MB." value='2000000' />
71 <param name="statisticalTest_selector" type="select" label="Stistical test"> 69 <param argument="--chromosomes" optional='true' type="text" label="Chromosomes to include" help= "Chromosomes to include in the analysis. If not set, all chromosomes are included." />
72 <option value="wilcoxon-rank-sum" selected="True">Wilcoxon rank-sum'</option> 70 <param argument="--region" optional='true' type="text" label="Chromosomes to include" help= "The format is chr:start-end." />
73 <option value="anderson-darling">Anderson-Darling</option> 71 <param argument="--expected" type="select" label="Method to compute the expected value">
72 <option value="mean" selected="True">mean</option>
73 <option value="mean_nonzero">mean_nonzero</option>
74 <option value="mean_nonzero">mean_nonzero_ligation</option>
74 </param> 75 </param>
75 </inputs> 76 </inputs>
76 <outputs> 77 <outputs>
77 <data name='output_loops' from_work_dir='output_loop.bedgraph' format='bedgraph' label='Computed loops'/> 78 <data name='output_loops' from_work_dir='output_loop.bedgraph' format='bedgraph' label='Computed loops' />
78 </outputs> 79 </outputs>
79 <tests> 80 <tests>
80 <test> 81 <test>
81 <param name="matrix_h5_cooler" value="small_test_matrix.cool"/> 82 <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
82 <param name="maxLoopDistance" value="30000000"/> 83 <param name="maxLoopDistance" value="30000000" />
83 <param name="windowSize" value="5"/> 84 <param name="windowSize" value="5" />
84 <param name="peakWidth" value="2"/> 85 <param name="peakWidth" value="2" />
85 <param name="pValue" value="0.5"/> 86 <param name="pValue" value="0.5" />
86 <param name="pValuePreselection" value="0.55"/> 87 <param name="pValuePreselection" value="0.55" />
87 <!-- <param name="chromosomes" value="X"/> --> 88 <output name="output_loops" file="hicDetectLoops/loops.bedgraph" ftype="bedgraph" compare="sim_size" />
88 <output name="output_loops" file="hicDetectLoops/loops.bedgraph" ftype="bedgraph" compare="sim_size"/>
89 </test> 89 </test>
90 </tests> 90 </tests>
91 <help><![CDATA[ 91 <help><![CDATA[
92 92
93 Loop detection 93 Loop detection
94 ============== 94 ==============
95 95
96 Computes enriched regions (peaks) or long range contacts on the given contact matrix. 96 Computes enriched regions (peaks) or long range contacts on the given contact matrix.
97 97
98 hicDetectLoops can detect enriched interaction regions (peaks / loops) based on a strict candidate selection, negative binomial distributions and Anderson-Darling / Wilcoxon rank-sum tests. 98 hicDetectLoops can detect enriched interaction regions (peaks / loops) based on a strict candidate selection, negative binomial distributions and Wilcoxon rank-sum tests.
99 99
100 The algorithm was mainly develop on GM12878 cells from Rao 2014 on 10kb and 5kb fixed bin size resolution. 100 The algorithm was mainly develop on GM12878 cells from Rao 2014 on 10kb and 5kb fixed bin size resolution.
101 101
102 _________________ 102 _________________
103 103
104 Usage 104 Usage
105 ----- 105 -----
106 106
107 A command line example is available below (easily matchable in Galaxy using each field information): 107 A command line example is available below (easily matchable in Galaxy using each field information):
108 108
109 ̀`$ hicDetectLoops -m matrix.cool -o loops.bedgraph --maxLoopDistance 2000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.05 --peakInteractionsThreshold 20 --maximumInteractionPercentageThreshold 0.1 --statisticTest anderson-darling` 109 ̀`$ hicDetectLoops -m matrix.cool -o loops.bedgraph --maxLoopDistance 2000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.05 --peakInteractionsThreshold 20`
110 110
111 The candidate selection is based on the restriction of the maximum genomic distance, here 2MB. This distance is given by Rao 2014. For each genomic distance a negative binomial distribution is computed and only interaction pairs with a threshold less than ``--pValuePreselection`` are accepted. Detected candidates need to have at least an interaction count of ``--maximumInteractionPercentageThreshold`` times the maximum value for their genomic distance. Please note that ``--maximumInteractionPercentageThreshold`` was introduced with HiCExplorer release 3.2. Earlier versions did not have this parameter yet and therefore their outputs may differ. In a second step, each candidate is considered compared to its neighborhood. This neighborhood is defined by the ``--windowSize`` parameter in the x and y dimension. Per neighborhood only one candidate is considered, therefore only the candidate with the highest peak values is accepted. As a last step, the neighborhood is split into a peak and background region (parameter ``--peakWidth``). The peakWidth can never be larger than the windowSize. However, we recommend for 10kb matrices a windowSize of 10 and a peakWidth of 6. 111 The candidate selection is based on the restriction of the maximum genomic distance, here 2MB. This distance is given by Rao 2014. For each genomic distance a negative binomial distribution is computed and only interaction pairs with a threshold less than ``--pValuePreselection`` are accepted. Detected candidates need to have at least an interaction count of ``--maximumInteractionPercentageThreshold`` times the maximum value for their genomic distance. Please note that ``--maximumInteractionPercentageThreshold`` was introduced with HiCExplorer release 3.2. Earlier versions did not have this parameter yet and therefore their outputs may differ. In a second step, each candidate is considered compared to its neighborhood. This neighborhood is defined by the ``--windowSize`` parameter in the x and y dimension. Per neighborhood only one candidate is considered, therefore only the candidate with the highest peak values is accepted. As a last step, the neighborhood is split into a peak and background region (parameter ``--peakWidth``). The peakWidth can never be larger than the windowSize. However, we recommend for 10kb matrices a windowSize of 10 and a peakWidth of 6.
112 112
113 The output file (´´-o loops.bedgraph``) contains the x and y position of each loop and its corresponding p-value of the Anderson-Darling test. 113 The output file (´´-o loops.bedgraph``) contains the x and y position of each loop and its corresponding p-value of the Anderson-Darling test.
114 114
124 124
125 For more information about HiCExplorer please consider our documentation on readthedocs.io_. 125 For more information about HiCExplorer please consider our documentation on readthedocs.io_.
126 126
127 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html 127 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
128 128
129 ]]></help> 129 ]]> </help>
130 <expand macro="citations" /> 130 <expand macro="citations" />
131 </tool> 131 </tool>