diff hicPCA.xml @ 16:77919cc3618e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 07802a6bd441d9da888cfb8283f8c2135704f7c9
author iuc
date Wed, 18 Oct 2023 10:39:52 +0000
parents 41dbf4d162a2
children c3e5e2cf12b7
line wrap: on
line diff
--- a/hicPCA.xml	Tue Jan 10 18:56:55 2023 +0000
+++ b/hicPCA.xml	Wed Oct 18 10:39:52 2023 +0000
@@ -7,11 +7,17 @@
     <expand macro="requirements" />
     <command detect_errors="exit_code"><![CDATA[
         ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
+        mkdir output &&
         @BINARY@ --matrix 'matrix.$matrix_h5_cooler.ext'
 
         @CHROMOSOME_LIST@
 
-        --outputFileName pca1.$outputFormat pca2.$outputFormat
+        #set $eigenvectors = ' '.join([ '\'%s\'' % $eigenvector for $eigenvector in str($whichEigenvectors).split(' ') ])
+       --whichEigenvectors $eigenvectors
+       
+        #set $eigenvectors_output = ' '.join([ '\'output/pca%s.%s\'' % ($eigenvector, $outputFormat) for $eigenvector in str($whichEigenvectors).split(' ') ])
+
+        --outputFileName $eigenvectors_output
         --format $outputFormat
 
         #if $ligation_factor:
@@ -37,8 +43,6 @@
             --obsexpMatrix obsexp.$matrix_h5_cooler.ext
         #end if
 
-        && mv pca1.$outputFormat pca1
-        && mv pca2.$outputFormat pca2
         #if $pearsonMatrix:
             && mv pearson.$matrix_h5_cooler.ext pearson
         #end if
@@ -50,48 +54,45 @@
 ]]>
     </command>
     <inputs>
-        <expand macro='matrix_h5_cooler_macro' />
-        <param name='outputFormat' type='select' label="Output file format">
-            <option value='bigwig' selected="true">bigwig</option>
+        <expand macro="matrix_h5_cooler_macro" />
+        <param name="outputFormat" type="select" label="Output file format">
+            <option value="bigwig" selected="true">bigwig</option>
             <option value="bedgraph">bedgraph</option>
         </param>
 
         <expand macro="chromosome_list" />
+        <param argument="--whichEigenvectors" optional="true" type="text" value="1 2" label="Eigenvectors to compute" help= "Define the eigenvectors that should be computed as a list. For example 1 2 5 to compute the first, second and fifth eigenvetor." />
 
-        <param name='ligation_factor' type='boolean' truevalue='--ligation_factor' label='Multiplies a scaling factor to each entry of the expected matrix to take care of the proximity ligation' />
-        <param name='ignoreMaskedBins' type='boolean' truevalue='--ignoreMaskedBins' label='This option removes the masked bins before the PCA is computed' />
+        <param name="ligation_factor" type="boolean" truevalue="--ligation_factor" label="Multiplies a scaling factor to each entry of the expected matrix to take care of the proximity ligation" />
+        <param name="ignoreMaskedBins" type="boolean" truevalue="--ignoreMaskedBins" label="This option removes the masked bins before the PCA is computed" />
 
         <conditional name="extra_track_conditional">
-            <param name='extra_track_selection' label='Extra track type' type='select'>
-                <option value='' selected='true'>No track</option>
-                <option value='gene_density'>Gene density</option>
-                <option value='histon_mark'>Histon mark coverage</option>
+            <param name="extra_track_selection" label="Extra track type" type="select">
+                <option value="" selected="true">No track</option>
+                <option value="gene_density">Gene density</option>
+                <option value="histon_mark">Histon mark coverage</option>
             </param>
-            <when value='gene_density' >
-                <param name='extraTrack' type='data' format='bed' label='Correlate PCA with e.g. gene density or histon marks to flip sign' />
+            <when value="gene_density" >
+                <param name="extraTrack" type="data" format="bed" label="Correlate PCA with e.g. gene density or histon marks to flip sign" />
             </when>
-            <when value='histon_mark'>
-                <param name='extraTrack' type='data' format='bed,bigwig' label='Correlate PCA with e.g. gene density or histon marks to flip sign' />
-                <param name='histonMarkType' type='select' label="Histon mark type">
-                    <option value='active' selected="true">active</option>
+            <when value="histon_mark">
+                <param name="extraTrack" type="data" format="bed,bigwig" label="Correlate PCA with e.g. gene density or histon marks to flip sign" />
+                <param name="histonMarkType" type="select" label="Histon mark type">
+                    <option value="active" selected="true">active</option>
                     <option value="inactive">inactive</option>
                 </param>
             </when>
-            <when value='' />
+            <when value="" />
         </conditional>
-        <param argument='--pearsonMatrix' type='boolean' truevalue='--pearsonMatrix' falsevalue='' label='Return internally used Pearson matrix' />
-        <param argument='--obsexpMatrix' type='boolean' truevalue='--obsexpMatrix' falsevalue='' label='Return internally used observed / expected matrix' />
+        <param argument="--pearsonMatrix" type="boolean" truevalue="--pearsonMatrix" falsevalue="" label="Return internally used Pearson matrix" />
+        <param argument="--obsexpMatrix" type="boolean" truevalue="--obsexpMatrix" falsevalue="" label="Return internally used observed / expected matrix" />
 
     </inputs>
     <outputs>
-        <data name="pca1" from_work_dir="pca1" format="bigwig" label="${tool.name} on ${matrix_h5_cooler.name} [${on_string}]: PC1">
+        <data format="bedgraph" name="pca">
+            <discover_datasets pattern="__name_and_ext__" directory="output" visible="true" />
             <change_format>
-                <when input="outputFormat" value="bedgraph" format="bedgraph" />
-            </change_format>
-        </data>
-        <data name="pca2" from_work_dir="pca2" format="bigwig" label="${tool.name} on ${matrix_h5_cooler.name} [${on_string}]: PC2">
-            <change_format>
-                <when input="outputFormat" value="bedgraph" format="bedgraph" />
+                <when input="outputFormat" value="bigwig" format="bigwig" />
             </change_format>
         </data>
 
@@ -109,17 +110,19 @@
         </data>
     </outputs>
     <tests>
-        <test>
+        <test expect_num_outputs="1">
             <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
             <param name="outputFormat" value="bigwig" />
             <conditional name="extra_track_conditional">
                 <param name="extra_track_selection" value="" />
             </conditional>
+             <output name="pca">
+                <discovered_dataset designation="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+                <discovered_dataset designation="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+            </output>
 
-            <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
-            <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
             <param name="outputFormat" value="bigwig" />
             <param name="ligation_factor" value="True" />
@@ -127,11 +130,13 @@
             <conditional name="extra_track_conditional">
                 <param name="extra_track_selection" value="" />
             </conditional>
+            <output name="pca">
+                <discovered_dataset designation="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+                <discovered_dataset designation="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+            </output>
 
-            <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
-            <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
             <param name="outputFormat" value="bigwig" />
             <param name="ignoreMaskedBins" value="True" />
@@ -140,10 +145,12 @@
                 <param name="extra_track_selection" value="" />
             </conditional>
 
-            <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='60000' />
-            <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='60000' />
+            <output name="pca">
+                <discovered_dataset designation="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+                <discovered_dataset designation="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+            </output>
         </test>
-        <test>
+        <test expect_num_outputs="1">
             <param name="matrix_h5_cooler" value="small_test_matrix.h5" />
             <param name="outputFormat" value="bigwig" />
             <conditional name="extra_track_conditional">
@@ -158,18 +165,20 @@
                 <param name="chromosomes" value="chrXHet" />
             </repeat>
 
-            <output name="pca1" file="hicPCA/pca1_test2.bw" ftype="bigwig" compare="sim_size" delta='40000' />
-            <output name="pca2" file="hicPCA/pca2_test2.bw" ftype="bigwig" compare="sim_size" delta='40000' />
+            <output name="pca">
+                <discovered_dataset designation="pca1" file="hicPCA/pca1_test2.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+                <discovered_dataset designation="pca2" file="hicPCA/pca2_test2.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+            </output>
         </test>
-        <test>
+        <test expect_num_outputs="3">
             <param name="matrix_h5_cooler" value="small_test_matrix.h5" />
 
             <param name="outputFormat" value="bigwig" />
             <conditional name="extra_track_conditional">
                 <param name="extra_track_selection" value="" />
             </conditional>
-            <param name='pearsonMatrix' value='True' />
-            <param name='obsexpMatrix' value='True' />
+            <param name="pearsonMatrix" value="True" />
+            <param name="obsexpMatrix" value="True" />
 
             <repeat name="chromosome_list">
                 <param name="chromosomes" value="chrX" />
@@ -178,17 +187,19 @@
                 <param name="chromosomes" value="chrXHet" />
             </repeat>
 
-            <output name="pca1" file="hicPCA/pca1_test3.bw" ftype="bigwig" compare="sim_size" delta='40000' />
-            <output name="pca2" file="hicPCA/pca2_test3.bw" ftype="bigwig" compare="sim_size" delta='40000' />
+            <output name="pca">
+                <discovered_dataset designation="pca1" file="hicPCA/pca1_test3.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+                <discovered_dataset designation="pca2" file="hicPCA/pca2_test3.bw" ftype="bigwig" compare="sim_size" delta="100000" />
+            </output>
 
             <output name="pearson_outfile" ftype="h5">
                 <assert_contents>
-                    <has_h5_keys keys='intervals,matrix' />
+                    <has_h5_keys keys="intervals,matrix" />
                 </assert_contents>
             </output>
             <output name="obsexp_outfile" ftype="h5">
                 <assert_contents>
-                    <has_h5_keys keys='intervals,matrix' />
+                    <has_h5_keys keys="intervals,matrix" />
                 </assert_contents>
             </output>
         </test>
@@ -201,7 +212,7 @@
 `Lieberman-Aiden et al. (2009)`_ demonstrated that open and closed chromatin domains throughout the genome occupy different spatial compartments in the nucleus, defined as A (activate) and B (inactive) compartments.
 
 **hicPCA** computes two eigenvector files based on the input matrix for an A / B compartment analysis following the computation steps detailed by `Lieberman-Aiden et al. (2009)`_: the transformation of the contact matrix
-into an observed vs. expected matrix and consecutively a Pearson correlation matrix shows a plaid pattern. These plaid pattern are called A and B. Applying a PCA on the Pearson correlation matrix gives the eigenvectors
+into an observed vs. expected matrix and consecutively a Pearson correlation matrix shows a plaid pattern. These plaid pattern are called A and B. Applying a PCA on the obs/exp matrix gives the eigenvectors
 and Lieberman-Aiden shows that the values of the eigenvectors correspond to the distribution of genes and with features of open and closed chromatin. In some cases the first principal component corresponds to the two
 chromosomes arms and the second eigenvector to the plaid pattern. Therefore always the first two principal components needs to be returned and investigated.
 
@@ -228,7 +239,7 @@
 | For more information about HiCExplorer please consider our documentation on readthedocs.io_
 
 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
-.. _`Lieberman-Aiden et al. (2009)`: https://doi.org/10.1126/science.1181369
+.. _`Lieberman-Aiden et al. (2009)`: https://pubmed.ncbi.nlm.nih.gov/19815776/
 ]]>    </help>
     <expand macro="citations" />
 </tool>