Repository 'plant_tribes_gene_family_scaffold_updater'
hg clone https://toolshed.g2.bx.psu.edu/repos/greg/plant_tribes_gene_family_scaffold_updater

Changeset 0:2b0906489073 (2018-08-21)
Commit message:
Uploaded
added:
.shed.yml
gene_family_scaffold_updater.pl
gene_family_scaffold_updater.xml
macros.xml
plant_tribes_scaffolds.loc
plant_tribes_scaffolds.loc.sample
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 2b0906489073 .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Tue Aug 21 13:00:21 2018 -0400
b
@@ -0,0 +1,12 @@
+name: plant_tribes_gene_family_scaffold_updater
+owner: greg
+description: |
+  Contains a tool that adds a new genome to a PlantTribes scaffold.
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+  Contains a tool that adds a new genome to a PlantTribes scaffold installed into Galaxy via the PlantTribes Scaffolds
+  Downloader data manager tool.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/gene_family_scaffold_updater
+type: unrestricted
+categories:
+- Phylogenetics
b
diff -r 000000000000 -r 2b0906489073 gene_family_scaffold_updater.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_scaffold_updater.pl Tue Aug 21 13:00:21 2018 -0400
[
b'@@ -0,0 +1,880 @@\n+#!/usr/bin/env perl\n+# Author: Eric Wafula\n+# Email: ekw10@psu.edu\n+# Institution: Penn State University, Biology Dept, Claude dePamphilis Lab\n+# Date: June 2018\n+\n+use strict;\n+use warnings;\n+use File::Spec;\n+use File::Basename;\n+use Getopt::Long qw(:config no_ignore_case);\n+use FindBin;\n+use DBI;\n+\n+my $home =  "$FindBin::Bin/..";\n+\n+my $usage = <<__EOUSAGE__;\n+\n+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n+#\n+#                                  GENE FAMILY SCAFFOLD UPDATER\n+#\n+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n+#  Required Options:\n+#\n+#\n+#  --database_connection_string <string>    : Postgres database connection string using format\n+#                                             postgresql://<user>:<password>@<host>/<database name>\n+#\n+#  --proteins <string>                      : Amino acids (proteins) sequences fasta file (proteins.fasta)\n+#                                             This can either be an absolute path or just the file name\n+#\n+#  --coding_sequences <string>              : Corresponding coding sequences (CDS) fasta file (cds.fasta)\n+#\n+#  --scaffold <string>                      : Orthogroups or gene families proteins scaffold.  This can either be an absolute\n+#                                             path to the directory containing the scaffolds (e.g., /home/scaffolds/22Gv1.1)\n+#                                             or just the scaffold (e.g., 22Gv1.1).  If the latter, ~home/data is prepended to\n+#                                             the scaffold to create the absolute path.\n+#                                             the scaffold to create the absolute path.\n+#                                             If Monocots clusters (version 1.0): 12Gv1.0\n+#                                             If Angiosperms clusters (version 1.0): 22Gv1.0\n+#                                             If Angiosperms clusters (version 1.1): 22Gv1.1\n+#                                             If Green plants clusters (version 1.0): 31Gv1.0\n+#                                             If Other non PlantTribes clusters: XGvY.Z, where "X" is the number species in the scaffold,\n+#                                             and "Y.Z" version number such as 12Gv1.0. Please look at one of the PlantTribes scaffold\n+#                                               data on how data files and directories are named, formated, and organized.\n+#\n+#\n+#  --species_name <string>                  : Name of the species\n+#\n+#  --species_code <string>                  : Code of the species\n+#\n+#  --species_family <string>                : Family of the species\n+#\n+#  --species_order <string>                 : Order of the species\n+#\n+#  --species_group <string>                 : Group of the species\n+#\n+#  --species_clade <string>                 : Clade of the species\n+#\n+#  --rooting_order_species_code <string>    : Species code after which the new species will be placed in the rooting order config file\n+#\n+# # # # # # # # # # # # # # # # # #\n+#  Others Options:\n+#\n+#  --num_threads <int>                      : number of threads (CPUs) to used for HMMScan, BLASTP, and MAFFT\n+#                                             Default: 1\n+#\n+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n+#  Example Usage:\n+#\n+#  GeneFamilyScaffoldUpdater --database_connection_string postgresql://<user>:<password>@<host>/<database name>\n+                             --proteins proteins.fasta --coding_sequences cds.fasta --scaffold 22Gv1.1\n+#                            --species_name Fake genome --species_family Brassicaceae --species_order Brassicales\n+                             --species_group Rosids --species_clade Core Eudicots --rooting_order_species_code P'..b'             else { s/\\s+//g; $dna{$dna_id} .= $_; }\n+                }\n+                close IN;\n+            }\n+            if ($filename =~ /^(\\d+)\\.faa$/) {\n+                open(IN, "$orthogroups_fasta_dir/$filename") or die "Can\'t open $orthogroups_fasta_dir/$filename file\\n";\n+                while(<IN>){\n+                    chomp;\n+                    if (/^>(\\S+)/){ $aa_id = $1; next; }\n+                    else { s/\\s+//g; $aa{$aa_id} .= $_; }\n+                }\n+                close IN;\n+            }\n+        }\n+        close DIR;\n+    }\n+    close ASSOC;\n+    # Updating gene database table\n+    log_msg("Inserting records into the plant_tribes_gene database table.");\n+    $num_recs = 0;\n+    foreach my $gene_id (sort keys %dna) {\n+        my $stmt = qq(INSERT INTO plant_tribes_gene (gene_id, dna_sequence, aa_sequence) VALUES (\'$gene_id\', \'$dna{$gene_id}\', \'$aa{$gene_id}\'));\n+        my $rv = $dbh->do($stmt) or die $DBI::errstr;\n+        $num_recs = $num_recs + 1;\n+    }\n+    log_msg("$num_recs records for $species_name $scaffold were successfully inserted into the plant_tribes_gene table.");\n+    # Updaing gene-scaffold-orthogroup-taxon-association database table\n+    log_msg("Inserting  records into the gene_scaffold_orthogroup_taxon_association database table.");\n+    open(IN, "$gsot_association_prep_file") or die "Can\'t open $gsot_association_prep_file file\\n";\n+    $num_recs = 0;\n+    my ( $stmt, $sth, $rv, $scaffold_id, $clustering_method, $orthogroup_id, $taxon_id, $gene_id );\n+    my ( $gene_id_db, $scaffold_id_db, $orthogroup_id_db, $taxon_id_db );\n+    while(<IN>){\n+        chomp;\n+        if (/^gene_id/) {\n+            # gene_id scaffold_id clustering_method orthogroup_id species_name\n+            next;\n+        }\n+        my @fields = split(/\\t/, $_);\n+        # gnl_Fakge_v1.0_AT1G03390.1 22Gv1.1 orthomcl 3 Fake genome\n+        $gene_id = $fields[0];\n+        $scaffold_id = $fields[1];\n+        $clustering_method = $fields[2];\n+        $orthogroup_id = $fields[3];\n+        $species_name = $fields[4];\n+        $stmt = qq(SELECT id FROM plant_tribes_scaffold WHERE scaffold_id = \'$scaffold_id\' AND clustering_method = \'$clustering_method\';);\n+        $sth = $dbh->prepare( $stmt );\n+        $rv = $sth->execute() or die $DBI::errstr;\n+        if ($rv < 0) { print $DBI::errstr; }\n+        while (my @row = $sth->fetchrow_array()) {\n+            $scaffold_id_db = $row[0];\n+        }\n+        $stmt = qq(SELECT id FROM plant_tribes_orthogroup WHERE orthogroup_id = \'$orthogroup_id\' AND scaffold_id = \'$scaffold_id_db\';);\n+        $sth = $dbh->prepare( $stmt );\n+        $rv = $sth->execute() or die $DBI::errstr;\n+        if ($rv < 0) { print $DBI::errstr; }\n+        while (my @row = $sth->fetchrow_array()) {\n+            $orthogroup_id_db = $row[0];\n+        }\n+        $stmt = qq(SELECT id FROM plant_tribes_taxon WHERE species_name = \'$species_name\' AND scaffold_id = \'$scaffold_id_db\';);\n+        $sth = $dbh->prepare( $stmt );\n+        $rv = $sth->execute() or die $DBI::errstr;\n+        if ($rv < 0) { print $DBI::errstr; }\n+        while (my @row = $sth->fetchrow_array()) {\n+            $taxon_id_db = $row[0];\n+        }\n+        $stmt = qq(SELECT id FROM plant_tribes_gene WHERE gene_id = \'$gene_id\' );\n+        $sth = $dbh->prepare( $stmt );\n+        $rv = $sth->execute() or die $DBI::errstr;\n+        if ($rv < 0) { print $DBI::errstr; }\n+        while (my @row = $sth->fetchrow_array()) {\n+            $gene_id_db = $row[0];\n+        }\n+        $stmt = qq(INSERT INTO gene_scaffold_orthogroup_taxon_association (gene_id, scaffold_id, orthogroup_id, taxon_id) VALUES ($gene_id_db, $scaffold_id_db, $orthogroup_id_db, $taxon_id_db));\n+        $rv = $dbh->do($stmt) or die $DBI::errstr;\n+        $num_recs = $num_recs + 1;\n+    }\n+    close IN;\n+    log_msg("$num_recs records for $scaffold $clustering_method were successfully inserted into the gene_scaffold_orthogroup_taxon_association table.");\n+    $dbh->disconnect();\n+}\n'
b
diff -r 000000000000 -r 2b0906489073 gene_family_scaffold_updater.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_scaffold_updater.xml Tue Aug 21 13:00:21 2018 -0400
[
@@ -0,0 +1,91 @@
+<tool id="plant_tribes_gene_family_scaffold_updater" name="Update PlantTribes scaffold" version="@WRAPPER_VERSION@.0.0">
+    <description>with a new genome</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="2.7.1">blast</requirement>
+        <requirement type="package" version="3.1b2">hmmer</requirement>
+        <requirement type="package" version="7.313">mafft</requirement>
+        <requirement type="package" version="5.22.0">perl</requirement>
+        <requirement type="package" version="1.641">perl-dbi</requirement>
+        <requirement type="package" version="3.5.3">perl-dbd-pg</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+perl '$__tool_directory__/gene_family_scaffold_updater.pl'
+--database_connection_string '$__app__.config.plant_tribes_database_connection'
+--proteins '$input_proteins'
+--coding_sequences '$input_coding_sequences'
+--scaffold '$GALAXY_DATA_INDEX_DIR/plant_tribes/scaffolds/$scaffold'
+--species_name '$species_name'
+--species_code '$species_code'
+--species_family '$species_family'
+--species_order '$species_order'
+--species_group '$species_group'
+--species_clade '$species_clade'
+--rooting_order_species_code '$rooting_order_species_code'
+--num_threads \${GALAXY_SLOTS:-4}
+&>'$output']]></command>
+    <inputs>
+        <expand macro="param_scaffold"/>
+        <param name="input_proteins" format="fasta" type="data" label="Proteins fasta file">
+            <validator type="empty_field"/>
+        </param>
+        <param name="input_coding_sequences" format="fasta" type="data" label="Coding sequences fasta file">
+            <validator type="empty_field"/>
+        </param>
+        <param name="species_name" type="text" value="" label="Species name">
+            <validator type="empty_field"/>
+        </param>
+        <param name="species_code" type="text" value="" label="Species code">
+            <validator type="empty_field"/>
+        </param>
+        <param name="species_family" type="text" value="" label="Species family">
+            <validator type="empty_field"/>
+        </param>
+        <param name="species_order" type="text" value="" label="Species order">
+            <validator type="empty_field"/>
+        </param>
+        <param name="species_group" type="text" value="" label="Species group">
+            <validator type="empty_field"/>
+        </param>
+        <param name="species_clade" type="text" value="" label="Species clade">
+            <validator type="empty_field"/>
+        </param>
+        <param name="rooting_order_species_code" type="text" label="Species code for rooting order" help="The new species above will be placed immediately after this species code in the rooting order configuration file">
+            <validator type="empty_field"/>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="txt"/>
+    </outputs>
+    <tests>
+        <test>
+            <!--Testing this tool is a bit difficult at the current time.-->
+        </test>
+    </tests>
+    <help>
+This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
+analyses of genome-scale gene families and transcriptomes. This tool adds a new genome to a scaffold installed into Galaxy
+by the PlantTribes Scaffolds Downloader data manager tool.
+
+-----
+
+**Required options**
+
+ * **Proteins fasta file** - amino acids (proteins) sequences fasta file for new genome.
+ * **Coding sequences fasta file** - corresponding coding sequences (CDS) fasta file for new genome.
+ * **Gene family scaffold** - one of the PlantTribes gene family scaffolds, installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @unpublished{None,
+            author = {Eric Wafula},
+            title = {None},
+            year = {None},
+            eprint = {None},
+            url = {https://github.com/dePamphilis/PlantTribes}}
+        </citation>
+    </citations>
+</tool>
+
b
diff -r 000000000000 -r 2b0906489073 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Aug 21 13:00:21 2018 -0400
b
@@ -0,0 +1,27 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <token name="@WRAPPER_VERSION@">1.0</token>
+    <xml name="param_method">
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+    </xml>
+    <xml name="param_scaffold">
+        <param name="scaffold" type="select" label="Gene family scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
+        </param>
+    </xml>
+    <xml name="citation1">
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Wafula EK},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes},}
+        </citation>
+    </xml>
+</macros>
b
diff -r 000000000000 -r 2b0906489073 plant_tribes_scaffolds.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc Tue Aug 21 13:00:21 2018 -0400
b
@@ -0,0 +1,3 @@
+## Plant Tribes scaffolds
+#Value Name Path Description
+22Gv1.1 22Gv1.1 ${__HERE__}/test-data/tool-data/plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
b
diff -r 000000000000 -r 2b0906489073 plant_tribes_scaffolds.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample Tue Aug 21 13:00:21 2018 -0400
b
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value Name Path Description
+#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
b
diff -r 000000000000 -r 2b0906489073 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Aug 21 13:00:21 2018 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 2b0906489073 tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Tue Aug 21 13:00:21 2018 -0400
b
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="${__HERE__}/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>