changeset 3:1fb0f2092177 draft

Uploaded
author greg
date Mon, 30 Oct 2017 09:54:20 -0400 (2017-10-30)
parents 2fe7044626ac
children 48271ee78198
files .shed.yml kaks_analysis.xml macros.xml
diffstat 3 files changed, 37 insertions(+), 119 deletions(-) [+]
line wrap: on
line diff
--- a/.shed.yml	Thu Aug 24 13:41:07 2017 -0400
+++ b/.shed.yml	Mon Oct 30 09:54:20 2017 -0400
@@ -8,7 +8,7 @@
   utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
   comparative evolutionary studies.  This tool performs orthologous or paralogous ks analyses of coding sequences
   and amino acid sequences.
-remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/kaks_analysis
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/kaks_analysis
 type: unrestricted
 categories:
 - Phylogenetics
--- a/kaks_analysis.xml	Thu Aug 24 13:41:07 2017 -0400
+++ b/kaks_analysis.xml	Mon Oct 30 09:54:20 2017 -0400
@@ -1,9 +1,11 @@
-<tool id="plant_tribes_kaks_analysis" name="KaKsAnalysis" version="@WRAPPER_VERSION@.2">
+<tool id="plant_tribes_kaks_analysis" name="KaKsAnalysis" version="@WRAPPER_VERSION@.3.0">
     <description>estimates paralogous and orthologous pairwise synonymous (Ks) and non-synonymous (Ka) substitution rates</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements_kaks_analysis" />
+    <requirements>
+        <requirement type="package" version="1.0.3">plant_tribes_kaks_analysis</requirement>
+    </requirements>
     <command detect_errors="exit_code"><![CDATA[
 #set output_dir = 'kaksAnalysis_dir'
 #set comparison = $comparison_cond.comparison
@@ -34,6 +36,9 @@
 #if str($comparison) == 'orthologs':
     --coding_sequences_species_2 '$comparison_cond.coding_sequences_species_2'
     --proteins_species_2 '$comparison_cond.proteins_species_2'
+    #if str($comparison_cond.blast_option) == 'crbblast':
+        --crb_blast
+    #end if
 #end if
 #if str($options_type.options_type_selector) == 'advanced':
     #if str($set_min_coverage) == 'yes':
@@ -58,18 +63,19 @@
         --max_ks $set_upper_ks_limit_cond.max_ks
     #end if
 #end if
->/dev/null
+&>proc.log
 && mv $output_dir/species1.fna '$output_species1_fna'
 && mv $output_dir/species1.faa '$output_species1_faa'
 #if str($comparison) == 'paralogs':
     && mv $output_dir/species1.fna.blastn.paralogs '$output_species1_paralog'
+    && mv $output_dir/*.rbhb '$output_rbhb_paralog'
 #else:
     && mv $output_dir/species2.faa '$output_species2_faa'
     && mv $output_dir/species2.fna '$output_species2_fna'
     && mv $output_dir/species1.fna.blastn.orthologs '$output_species1_ortholog'
     && mv $output_dir/species2.fna.blastn.orthologs '$output_species2_ortholog'
+    && mv $output_dir/*.rbhb '$output_rbhb_ortholog'
 #end if
-&& mv $output_dir/*.rbhb '$output_rbhb'
 && mv $output_dir/*.kaks '$output_kaks'
 #if str($fit_components) == 'yes':
     && mv $output_dir/*.components '$output_components'
@@ -87,6 +93,10 @@
             <when value="orthologs">
                 <param name="coding_sequences_species_2" format="fasta" type="data" label="Coding sequences for the second species" />
                 <param name="proteins_species_2" format="fasta" type="data" label="Protein sequences for the second species" />
+                <param name="blast_option" type="select" display="radio" label="Determine for cross-species orthologs using">
+                    <option value="blast" selected="true">reciprocal best BLAST</option>
+                    <option value="crbblast">conditional reciprocal best BLAST</option>
+                </param>
             </when>
         </conditional>
         <conditional name="options_type">
@@ -164,26 +174,31 @@
         </param>
     </inputs>
     <outputs>
-        <data name="output_species1_fna" format="fasta" label="${tool.name} (coding sequences) on ${on_string}" />
-        <data name="output_species1_faa" format="fasta" label="${tool.name} (amino acids) on ${on_string}" />
-        <data name="output_species2_fna" format="fasta" label="${tool.name} (coding sequences) on ${on_string}">
+        <data name="output_species1_fna" format="fasta" label="${tool.name} (coding sequences species1) on ${on_string}" />
+        <data name="output_species1_faa" format="fasta" label="${tool.name} (amino acids species1) on ${on_string}" />
+        <data name="output_species2_fna" format="fasta" label="${tool.name} (coding sequences species2) on ${on_string}">
             <filter>comparison_cond['comparison'] == 'orthologs'</filter>
         </data>
-        <data name="output_species2_faa" format="fasta" label="${tool.name} (amino acids) on ${on_string}">
+        <data name="output_species2_faa" format="fasta" label="${tool.name} (amino acids species2) on ${on_string}">
             <filter>comparison_cond['comparison'] == 'orthologs'</filter>
         </data>
-        <data name="output_species1_paralog" format="tabular" label="${tool.name} (blastn results) on ${on_string}">
+        <data name="output_species1_paralog" format="tabular" label="${tool.name} (blastn results species1 vs species1) on ${on_string}">
             <filter>comparison_cond['comparison'] == 'paralogs'</filter>
         </data>
-        <data name="output_species1_ortholog" format="tabular" label="${tool.name} (blastn results) on ${on_string}">
+        <data name="output_species1_ortholog" format="tabular" label="${tool.name} (blastn results species1 vs species2) on ${on_string}">
+            <filter>comparison_cond['comparison'] == 'orthologs'</filter>
+        </data>
+        <data name="output_species2_ortholog" format="tabular" label="${tool.name} (blastn results species2 vs species1) on ${on_string}">
             <filter>comparison_cond['comparison'] == 'orthologs'</filter>
         </data>
-        <data name="output_species2_ortholog" format="tabular" label="${tool.name} (blastn results) on ${on_string}">
+        <data name="output_rbhb_paralog" format="tabular" label="${tool.name} (paralogous pairs) on ${on_string}">
+            <filter>comparison_cond['comparison'] == 'paralogs'</filter>
+        </data>
+        <data name="output_rbhb_ortholog" format="tabular" label="${tool.name} (orthologous pairs) on ${on_string}">
             <filter>comparison_cond['comparison'] == 'orthologs'</filter>
         </data>
-        <data name="output_rbhb" format="tabular" label="${tool.name} (paralogous pairs) on ${on_string}" />
-        <data name="output_kaks" format="tabular" label="${tool.name} on ${on_string}" />
-        <data name="output_components" format="ptkscmp" label="${tool.name} (significant components in the ks distribution) on ${on_string}">
+        <data name="output_kaks" format="tabular" label="${tool.name} (KaKs distribution) on ${on_string}" />
+        <data name="output_components" format="ptkscmp" label="${tool.name} (significant components in the KaKs distribution) on ${on_string}">
             <filter>options_type['options_type_selector'] == 'advanced' and options_type['fit_components_cond']['fit_components'] == 'yes'</filter>
         </data>
     </outputs>
@@ -195,7 +210,7 @@
             <output name="output_species1_fna" file="species1_cds.fasta" ftype="fasta" compare="contains"/>
             <output name="output_species1_faa" file="species1_pep.fasta" ftype="fasta" compare="contains"/>
             <output name="output_species1_paralog" file="output_blastn_results1.tabular" ftype="tabular" compare="contains"/>
-            <output name="output_rbhb" file="output_paralogous_pairs.tabular" ftype="tabular"/>
+            <output name="output_rbhb_paralog" file="output_paralogous_pairs.tabular" ftype="tabular"/>
             <output name="output_kaks" file="output1.tabular" ftype="tabular"/>
         </test>
     </tests>
@@ -218,6 +233,12 @@
 
   - **Coding sequences for the second species** - coding sequence fasta file for the second species either produced by the AssemblyPostProcessor tool or from an external source selected from your history.  This option is required only for orthologous comparison.
   - **Protein sequences for the second species** - corresponding protein sequence fasta files for the second species either produced by the AssemblyPostProcessor tool or from an external source selected from your history.  This option is required only for orthologous comparison.
+
+  - **Determine for cross-species orthologs using** - select option for blast orthology.
+
+    - **reciprocal best BLAST** - use the default stringent reciprocal BLAST package for orthology assignment.
+    - **conditional reciprocal best BLAST** - use the CRB-BLAST package for orthology assignment which increases sensitivity to orthology comparisons and determines additional cross-species orthologs that are being left out by the defaul stringent reciprocal BLAST.
+
   - **Alignment coverage configuration** - select 'Yes' to set the minimum allowable alignment coverage length between homologous pairs.  PlantTribes uses global codon alignment match score to determine the pairwise alignment coverage.  By default, the match score is set to 0.5 if 'No' is selected.
 
     - **match score** - number of base matches in a pairwise sequence alignment divided by the length of shorter sequence.  Positions in the alignment corresponding to gaps are not considered.  The score is restricted to the range 0.3 - 1.0.
--- a/macros.xml	Thu Aug 24 13:41:07 2017 -0400
+++ b/macros.xml	Mon Oct 30 09:54:20 2017 -0400
@@ -1,47 +1,6 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <macros>
     <token name="@WRAPPER_VERSION@">1.0</token>
-    <xml name="requirements_assembly_post_processor">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_assembly_post_processor</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_aligner">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_aligner</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_classifier">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_classifier</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_integrator">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_integrator</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_kaks_analysis">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_kaks_analysis</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_ks_distribution">
-        <requirements>
-            <requirement type="package" version="1.3.2">r-optparse</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_phylogeny_builder">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_phylogeny_builder</requirement>
-        </requirements>
-    </xml>
-    <xml name="param_codon_alignments">
-        <param name="codon_alignments" type="select" label="Codon alignments">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_method">
         <param name="method" type="select" label="Protein clustering method">
             <option value="gfam" selected="true">GFam</option>
@@ -49,74 +8,12 @@
             <option value="orthomcl">OrthoMCL</option>
         </param>
     </xml>
-    <xml name="param_options_type">
-        <param name="options_type" type="select" label="Options Configuration">
-            <option value="basic" selected="true">Basic</option>
-            <option value="advanced">Advanced</option>
-        </param>
-    </xml>
-    <xml name="param_orthogroup_fna">
-        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_scaffold">
         <param name="scaffold" type="select" label="Gene family scaffold">
             <options from_data_table="plant_tribes_scaffolds" />
             <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
         </param>
     </xml>
-    <xml name="param_sequence_type">
-        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
-            <option value="protein" selected="true">Amino acid based</option>
-            <option value="dna">Nucleotide based</option>
-        </param>
-    </xml>
-    <xml name="cond_alignment_method">
-        <conditional name="alignment_method_cond">
-            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
-                <option value="mafft" selected="true">MAFFT</option>
-                <option value="pasta">PASTA</option>
-            </param>
-            <when value="mafft" />
-            <when value="pasta">
-                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
-            </when>
-        </conditional>
-    </xml>
-    <xml name="cond_remove_gappy_sequences">
-        <conditional name="remove_gappy_sequences_cond">
-            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
-                <option value="no" selected="true">No</option>
-                <option value="yes">Yes</option>
-            </param>
-            <when value="no" />
-            <when value="yes">
-                <conditional name="trim_type_cond">
-                    <param name="trim_type" type="select" label="Trimming method">
-                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
-                        <option value="automated_trimming">Automated heuristic trimming</option>
-                    </param>
-                    <when value="gap_trimming">
-                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
-                    </when>
-                    <when value="automated_trimming" />
-                </conditional>
-                <conditional name="remove_sequences_with_gaps_cond">
-                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
-                        <option value="no" selected="true">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="no" />
-                    <when value="yes">
-                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
-                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
-                    </when>
-                </conditional>
-            </when>
-        </conditional>
-    </xml>
     <xml name="citation1">
         <citation type="bibtex">
             @misc{None,