comparison hicPCA.xml @ 16:77919cc3618e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 07802a6bd441d9da888cfb8283f8c2135704f7c9
author iuc
date Wed, 18 Oct 2023 10:39:52 +0000
parents 41dbf4d162a2
children c3e5e2cf12b7
comparison
equal deleted inserted replaced
15:41dbf4d162a2 16:77919cc3618e
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements" /> 7 <expand macro="requirements" />
8 <command detect_errors="exit_code"><![CDATA[ 8 <command detect_errors="exit_code"><![CDATA[
9 ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' && 9 ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
10 mkdir output &&
10 @BINARY@ --matrix 'matrix.$matrix_h5_cooler.ext' 11 @BINARY@ --matrix 'matrix.$matrix_h5_cooler.ext'
11 12
12 @CHROMOSOME_LIST@ 13 @CHROMOSOME_LIST@
13 14
14 --outputFileName pca1.$outputFormat pca2.$outputFormat 15 #set $eigenvectors = ' '.join([ '\'%s\'' % $eigenvector for $eigenvector in str($whichEigenvectors).split(' ') ])
16 --whichEigenvectors $eigenvectors
17
18 #set $eigenvectors_output = ' '.join([ '\'output/pca%s.%s\'' % ($eigenvector, $outputFormat) for $eigenvector in str($whichEigenvectors).split(' ') ])
19
20 --outputFileName $eigenvectors_output
15 --format $outputFormat 21 --format $outputFormat
16 22
17 #if $ligation_factor: 23 #if $ligation_factor:
18 $ligation_factor 24 $ligation_factor
19 #end if 25 #end if
35 41
36 #if $obsexpMatrix: 42 #if $obsexpMatrix:
37 --obsexpMatrix obsexp.$matrix_h5_cooler.ext 43 --obsexpMatrix obsexp.$matrix_h5_cooler.ext
38 #end if 44 #end if
39 45
40 && mv pca1.$outputFormat pca1
41 && mv pca2.$outputFormat pca2
42 #if $pearsonMatrix: 46 #if $pearsonMatrix:
43 && mv pearson.$matrix_h5_cooler.ext pearson 47 && mv pearson.$matrix_h5_cooler.ext pearson
44 #end if 48 #end if
45 49
46 #if $obsexpMatrix: 50 #if $obsexpMatrix:
48 #end if 52 #end if
49 53
50 ]]> 54 ]]>
51 </command> 55 </command>
52 <inputs> 56 <inputs>
53 <expand macro='matrix_h5_cooler_macro' /> 57 <expand macro="matrix_h5_cooler_macro" />
54 <param name='outputFormat' type='select' label="Output file format"> 58 <param name="outputFormat" type="select" label="Output file format">
55 <option value='bigwig' selected="true">bigwig</option> 59 <option value="bigwig" selected="true">bigwig</option>
56 <option value="bedgraph">bedgraph</option> 60 <option value="bedgraph">bedgraph</option>
57 </param> 61 </param>
58 62
59 <expand macro="chromosome_list" /> 63 <expand macro="chromosome_list" />
60 64 <param argument="--whichEigenvectors" optional="true" type="text" value="1 2" label="Eigenvectors to compute" help= "Define the eigenvectors that should be computed as a list. For example 1 2 5 to compute the first, second and fifth eigenvetor." />
61 <param name='ligation_factor' type='boolean' truevalue='--ligation_factor' label='Multiplies a scaling factor to each entry of the expected matrix to take care of the proximity ligation' /> 65
62 <param name='ignoreMaskedBins' type='boolean' truevalue='--ignoreMaskedBins' label='This option removes the masked bins before the PCA is computed' /> 66 <param name="ligation_factor" type="boolean" truevalue="--ligation_factor" label="Multiplies a scaling factor to each entry of the expected matrix to take care of the proximity ligation" />
67 <param name="ignoreMaskedBins" type="boolean" truevalue="--ignoreMaskedBins" label="This option removes the masked bins before the PCA is computed" />
63 68
64 <conditional name="extra_track_conditional"> 69 <conditional name="extra_track_conditional">
65 <param name='extra_track_selection' label='Extra track type' type='select'> 70 <param name="extra_track_selection" label="Extra track type" type="select">
66 <option value='' selected='true'>No track</option> 71 <option value="" selected="true">No track</option>
67 <option value='gene_density'>Gene density</option> 72 <option value="gene_density">Gene density</option>
68 <option value='histon_mark'>Histon mark coverage</option> 73 <option value="histon_mark">Histon mark coverage</option>
69 </param> 74 </param>
70 <when value='gene_density' > 75 <when value="gene_density" >
71 <param name='extraTrack' type='data' format='bed' label='Correlate PCA with e.g. gene density or histon marks to flip sign' /> 76 <param name="extraTrack" type="data" format="bed" label="Correlate PCA with e.g. gene density or histon marks to flip sign" />
72 </when> 77 </when>
73 <when value='histon_mark'> 78 <when value="histon_mark">
74 <param name='extraTrack' type='data' format='bed,bigwig' label='Correlate PCA with e.g. gene density or histon marks to flip sign' /> 79 <param name="extraTrack" type="data" format="bed,bigwig" label="Correlate PCA with e.g. gene density or histon marks to flip sign" />
75 <param name='histonMarkType' type='select' label="Histon mark type"> 80 <param name="histonMarkType" type="select" label="Histon mark type">
76 <option value='active' selected="true">active</option> 81 <option value="active" selected="true">active</option>
77 <option value="inactive">inactive</option> 82 <option value="inactive">inactive</option>
78 </param> 83 </param>
79 </when> 84 </when>
80 <when value='' /> 85 <when value="" />
81 </conditional> 86 </conditional>
82 <param argument='--pearsonMatrix' type='boolean' truevalue='--pearsonMatrix' falsevalue='' label='Return internally used Pearson matrix' /> 87 <param argument="--pearsonMatrix" type="boolean" truevalue="--pearsonMatrix" falsevalue="" label="Return internally used Pearson matrix" />
83 <param argument='--obsexpMatrix' type='boolean' truevalue='--obsexpMatrix' falsevalue='' label='Return internally used observed / expected matrix' /> 88 <param argument="--obsexpMatrix" type="boolean" truevalue="--obsexpMatrix" falsevalue="" label="Return internally used observed / expected matrix" />
84 89
85 </inputs> 90 </inputs>
86 <outputs> 91 <outputs>
87 <data name="pca1" from_work_dir="pca1" format="bigwig" label="${tool.name} on ${matrix_h5_cooler.name} [${on_string}]: PC1"> 92 <data format="bedgraph" name="pca">
93 <discover_datasets pattern="__name_and_ext__" directory="output" visible="true" />
88 <change_format> 94 <change_format>
89 <when input="outputFormat" value="bedgraph" format="bedgraph" /> 95 <when input="outputFormat" value="bigwig" format="bigwig" />
90 </change_format>
91 </data>
92 <data name="pca2" from_work_dir="pca2" format="bigwig" label="${tool.name} on ${matrix_h5_cooler.name} [${on_string}]: PC2">
93 <change_format>
94 <when input="outputFormat" value="bedgraph" format="bedgraph" />
95 </change_format> 96 </change_format>
96 </data> 97 </data>
97 98
98 <data name="pearson_outfile" from_work_dir="pearson" format="cool" label="${tool.name} on ${on_string}: Pearson matrix"> 99 <data name="pearson_outfile" from_work_dir="pearson" format="cool" label="${tool.name} on ${on_string}: Pearson matrix">
99 <filter>pearsonMatrix</filter> 100 <filter>pearsonMatrix</filter>
107 <when input_dataset="matrix_h5_cooler" attribute="ext" value="h5" format="h5" /> 108 <when input_dataset="matrix_h5_cooler" attribute="ext" value="h5" format="h5" />
108 </change_format> 109 </change_format>
109 </data> 110 </data>
110 </outputs> 111 </outputs>
111 <tests> 112 <tests>
112 <test> 113 <test expect_num_outputs="1">
113 <param name="matrix_h5_cooler" value="small_test_matrix.cool" /> 114 <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
114 <param name="outputFormat" value="bigwig" /> 115 <param name="outputFormat" value="bigwig" />
115 <conditional name="extra_track_conditional"> 116 <conditional name="extra_track_conditional">
116 <param name="extra_track_selection" value="" /> 117 <param name="extra_track_selection" value="" />
117 </conditional> 118 </conditional>
118 119 <output name="pca">
119 <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 120 <discovered_dataset designation="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
120 <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 121 <discovered_dataset designation="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
121 </test> 122 </output>
122 <test> 123
124 </test>
125 <test expect_num_outputs="1">
123 <param name="matrix_h5_cooler" value="small_test_matrix.cool" /> 126 <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
124 <param name="outputFormat" value="bigwig" /> 127 <param name="outputFormat" value="bigwig" />
125 <param name="ligation_factor" value="True" /> 128 <param name="ligation_factor" value="True" />
126 129
127 <conditional name="extra_track_conditional"> 130 <conditional name="extra_track_conditional">
128 <param name="extra_track_selection" value="" /> 131 <param name="extra_track_selection" value="" />
129 </conditional> 132 </conditional>
130 133 <output name="pca">
131 <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 134 <discovered_dataset designation="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
132 <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 135 <discovered_dataset designation="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
133 </test> 136 </output>
134 <test> 137
138 </test>
139 <test expect_num_outputs="1">
135 <param name="matrix_h5_cooler" value="small_test_matrix.cool" /> 140 <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
136 <param name="outputFormat" value="bigwig" /> 141 <param name="outputFormat" value="bigwig" />
137 <param name="ignoreMaskedBins" value="True" /> 142 <param name="ignoreMaskedBins" value="True" />
138 143
139 <conditional name="extra_track_conditional"> 144 <conditional name="extra_track_conditional">
140 <param name="extra_track_selection" value="" /> 145 <param name="extra_track_selection" value="" />
141 </conditional> 146 </conditional>
142 147
143 <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='60000' /> 148 <output name="pca">
144 <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='60000' /> 149 <discovered_dataset designation="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
145 </test> 150 <discovered_dataset designation="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
146 <test> 151 </output>
152 </test>
153 <test expect_num_outputs="1">
147 <param name="matrix_h5_cooler" value="small_test_matrix.h5" /> 154 <param name="matrix_h5_cooler" value="small_test_matrix.h5" />
148 <param name="outputFormat" value="bigwig" /> 155 <param name="outputFormat" value="bigwig" />
149 <conditional name="extra_track_conditional"> 156 <conditional name="extra_track_conditional">
150 <param name="extra_track_selection" value="gene_density" /> 157 <param name="extra_track_selection" value="gene_density" />
151 <param name="extraTrack" value="dm3_genes.bed.gz" /> 158 <param name="extraTrack" value="dm3_genes.bed.gz" />
156 </repeat> 163 </repeat>
157 <repeat name="chromosome_list"> 164 <repeat name="chromosome_list">
158 <param name="chromosomes" value="chrXHet" /> 165 <param name="chromosomes" value="chrXHet" />
159 </repeat> 166 </repeat>
160 167
161 <output name="pca1" file="hicPCA/pca1_test2.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 168 <output name="pca">
162 <output name="pca2" file="hicPCA/pca2_test2.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 169 <discovered_dataset designation="pca1" file="hicPCA/pca1_test2.bw" ftype="bigwig" compare="sim_size" delta="100000" />
163 </test> 170 <discovered_dataset designation="pca2" file="hicPCA/pca2_test2.bw" ftype="bigwig" compare="sim_size" delta="100000" />
164 <test> 171 </output>
172 </test>
173 <test expect_num_outputs="3">
165 <param name="matrix_h5_cooler" value="small_test_matrix.h5" /> 174 <param name="matrix_h5_cooler" value="small_test_matrix.h5" />
166 175
167 <param name="outputFormat" value="bigwig" /> 176 <param name="outputFormat" value="bigwig" />
168 <conditional name="extra_track_conditional"> 177 <conditional name="extra_track_conditional">
169 <param name="extra_track_selection" value="" /> 178 <param name="extra_track_selection" value="" />
170 </conditional> 179 </conditional>
171 <param name='pearsonMatrix' value='True' /> 180 <param name="pearsonMatrix" value="True" />
172 <param name='obsexpMatrix' value='True' /> 181 <param name="obsexpMatrix" value="True" />
173 182
174 <repeat name="chromosome_list"> 183 <repeat name="chromosome_list">
175 <param name="chromosomes" value="chrX" /> 184 <param name="chromosomes" value="chrX" />
176 </repeat> 185 </repeat>
177 <repeat name="chromosome_list"> 186 <repeat name="chromosome_list">
178 <param name="chromosomes" value="chrXHet" /> 187 <param name="chromosomes" value="chrXHet" />
179 </repeat> 188 </repeat>
180 189
181 <output name="pca1" file="hicPCA/pca1_test3.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 190 <output name="pca">
182 <output name="pca2" file="hicPCA/pca2_test3.bw" ftype="bigwig" compare="sim_size" delta='40000' /> 191 <discovered_dataset designation="pca1" file="hicPCA/pca1_test3.bw" ftype="bigwig" compare="sim_size" delta="100000" />
192 <discovered_dataset designation="pca2" file="hicPCA/pca2_test3.bw" ftype="bigwig" compare="sim_size" delta="100000" />
193 </output>
183 194
184 <output name="pearson_outfile" ftype="h5"> 195 <output name="pearson_outfile" ftype="h5">
185 <assert_contents> 196 <assert_contents>
186 <has_h5_keys keys='intervals,matrix' /> 197 <has_h5_keys keys="intervals,matrix" />
187 </assert_contents> 198 </assert_contents>
188 </output> 199 </output>
189 <output name="obsexp_outfile" ftype="h5"> 200 <output name="obsexp_outfile" ftype="h5">
190 <assert_contents> 201 <assert_contents>
191 <has_h5_keys keys='intervals,matrix' /> 202 <has_h5_keys keys="intervals,matrix" />
192 </assert_contents> 203 </assert_contents>
193 </output> 204 </output>
194 </test> 205 </test>
195 </tests> 206 </tests>
196 <help><![CDATA[ 207 <help><![CDATA[
199 ============================ 210 ============================
200 211
201 `Lieberman-Aiden et al. (2009)`_ demonstrated that open and closed chromatin domains throughout the genome occupy different spatial compartments in the nucleus, defined as A (activate) and B (inactive) compartments. 212 `Lieberman-Aiden et al. (2009)`_ demonstrated that open and closed chromatin domains throughout the genome occupy different spatial compartments in the nucleus, defined as A (activate) and B (inactive) compartments.
202 213
203 **hicPCA** computes two eigenvector files based on the input matrix for an A / B compartment analysis following the computation steps detailed by `Lieberman-Aiden et al. (2009)`_: the transformation of the contact matrix 214 **hicPCA** computes two eigenvector files based on the input matrix for an A / B compartment analysis following the computation steps detailed by `Lieberman-Aiden et al. (2009)`_: the transformation of the contact matrix
204 into an observed vs. expected matrix and consecutively a Pearson correlation matrix shows a plaid pattern. These plaid pattern are called A and B. Applying a PCA on the Pearson correlation matrix gives the eigenvectors 215 into an observed vs. expected matrix and consecutively a Pearson correlation matrix shows a plaid pattern. These plaid pattern are called A and B. Applying a PCA on the obs/exp matrix gives the eigenvectors
205 and Lieberman-Aiden shows that the values of the eigenvectors correspond to the distribution of genes and with features of open and closed chromatin. In some cases the first principal component corresponds to the two 216 and Lieberman-Aiden shows that the values of the eigenvectors correspond to the distribution of genes and with features of open and closed chromatin. In some cases the first principal component corresponds to the two
206 chromosomes arms and the second eigenvector to the plaid pattern. Therefore always the first two principal components needs to be returned and investigated. 217 chromosomes arms and the second eigenvector to the plaid pattern. Therefore always the first two principal components needs to be returned and investigated.
207 218
208 _________________ 219 _________________
209 220
226 _________________ 237 _________________
227 238
228 | For more information about HiCExplorer please consider our documentation on readthedocs.io_ 239 | For more information about HiCExplorer please consider our documentation on readthedocs.io_
229 240
230 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html 241 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
231 .. _`Lieberman-Aiden et al. (2009)`: https://doi.org/10.1126/science.1181369 242 .. _`Lieberman-Aiden et al. (2009)`: https://pubmed.ncbi.nlm.nih.gov/19815776/
232 ]]> </help> 243 ]]> </help>
233 <expand macro="citations" /> 244 <expand macro="citations" />
234 </tool> 245 </tool>