|
Commit message:
Uploaded |
|
added:
.shed.yml gene_family_scaffold_updater.pl gene_family_scaffold_updater.xml macros.xml plant_tribes_scaffolds.loc plant_tribes_scaffolds.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
| b |
| diff -r 000000000000 -r 2b0906489073 .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Aug 21 13:00:21 2018 -0400 |
| b |
| @@ -0,0 +1,12 @@ +name: plant_tribes_gene_family_scaffold_updater +owner: greg +description: | + Contains a tool that adds a new genome to a PlantTribes scaffold. +homepage_url: https://github.com/dePamphilis/PlantTribes +long_description: | + Contains a tool that adds a new genome to a PlantTribes scaffold installed into Galaxy via the PlantTribes Scaffolds + Downloader data manager tool. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/gene_family_scaffold_updater +type: unrestricted +categories: +- Phylogenetics |
| b |
| diff -r 000000000000 -r 2b0906489073 gene_family_scaffold_updater.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_scaffold_updater.pl Tue Aug 21 13:00:21 2018 -0400 |
| [ |
| b'@@ -0,0 +1,880 @@\n+#!/usr/bin/env perl\n+# Author: Eric Wafula\n+# Email: ekw10@psu.edu\n+# Institution: Penn State University, Biology Dept, Claude dePamphilis Lab\n+# Date: June 2018\n+\n+use strict;\n+use warnings;\n+use File::Spec;\n+use File::Basename;\n+use Getopt::Long qw(:config no_ignore_case);\n+use FindBin;\n+use DBI;\n+\n+my $home = "$FindBin::Bin/..";\n+\n+my $usage = <<__EOUSAGE__;\n+\n+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n+#\n+# GENE FAMILY SCAFFOLD UPDATER\n+#\n+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n+# Required Options:\n+#\n+#\n+# --database_connection_string <string> : Postgres database connection string using format\n+# postgresql://<user>:<password>@<host>/<database name>\n+#\n+# --proteins <string> : Amino acids (proteins) sequences fasta file (proteins.fasta)\n+# This can either be an absolute path or just the file name\n+#\n+# --coding_sequences <string> : Corresponding coding sequences (CDS) fasta file (cds.fasta)\n+#\n+# --scaffold <string> : Orthogroups or gene families proteins scaffold. This can either be an absolute\n+# path to the directory containing the scaffolds (e.g., /home/scaffolds/22Gv1.1)\n+# or just the scaffold (e.g., 22Gv1.1). If the latter, ~home/data is prepended to\n+# the scaffold to create the absolute path.\n+# the scaffold to create the absolute path.\n+# If Monocots clusters (version 1.0): 12Gv1.0\n+# If Angiosperms clusters (version 1.0): 22Gv1.0\n+# If Angiosperms clusters (version 1.1): 22Gv1.1\n+# If Green plants clusters (version 1.0): 31Gv1.0\n+# If Other non PlantTribes clusters: XGvY.Z, where "X" is the number species in the scaffold,\n+# and "Y.Z" version number such as 12Gv1.0. Please look at one of the PlantTribes scaffold\n+# data on how data files and directories are named, formated, and organized.\n+#\n+#\n+# --species_name <string> : Name of the species\n+#\n+# --species_code <string> : Code of the species\n+#\n+# --species_family <string> : Family of the species\n+#\n+# --species_order <string> : Order of the species\n+#\n+# --species_group <string> : Group of the species\n+#\n+# --species_clade <string> : Clade of the species\n+#\n+# --rooting_order_species_code <string> : Species code after which the new species will be placed in the rooting order config file\n+#\n+# # # # # # # # # # # # # # # # # #\n+# Others Options:\n+#\n+# --num_threads <int> : number of threads (CPUs) to used for HMMScan, BLASTP, and MAFFT\n+# Default: 1\n+#\n+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #\n+# Example Usage:\n+#\n+# GeneFamilyScaffoldUpdater --database_connection_string postgresql://<user>:<password>@<host>/<database name>\n+ --proteins proteins.fasta --coding_sequences cds.fasta --scaffold 22Gv1.1\n+# --species_name Fake genome --species_family Brassicaceae --species_order Brassicales\n+ --species_group Rosids --species_clade Core Eudicots --rooting_order_species_code P'..b' else { s/\\s+//g; $dna{$dna_id} .= $_; }\n+ }\n+ close IN;\n+ }\n+ if ($filename =~ /^(\\d+)\\.faa$/) {\n+ open(IN, "$orthogroups_fasta_dir/$filename") or die "Can\'t open $orthogroups_fasta_dir/$filename file\\n";\n+ while(<IN>){\n+ chomp;\n+ if (/^>(\\S+)/){ $aa_id = $1; next; }\n+ else { s/\\s+//g; $aa{$aa_id} .= $_; }\n+ }\n+ close IN;\n+ }\n+ }\n+ close DIR;\n+ }\n+ close ASSOC;\n+ # Updating gene database table\n+ log_msg("Inserting records into the plant_tribes_gene database table.");\n+ $num_recs = 0;\n+ foreach my $gene_id (sort keys %dna) {\n+ my $stmt = qq(INSERT INTO plant_tribes_gene (gene_id, dna_sequence, aa_sequence) VALUES (\'$gene_id\', \'$dna{$gene_id}\', \'$aa{$gene_id}\'));\n+ my $rv = $dbh->do($stmt) or die $DBI::errstr;\n+ $num_recs = $num_recs + 1;\n+ }\n+ log_msg("$num_recs records for $species_name $scaffold were successfully inserted into the plant_tribes_gene table.");\n+ # Updaing gene-scaffold-orthogroup-taxon-association database table\n+ log_msg("Inserting records into the gene_scaffold_orthogroup_taxon_association database table.");\n+ open(IN, "$gsot_association_prep_file") or die "Can\'t open $gsot_association_prep_file file\\n";\n+ $num_recs = 0;\n+ my ( $stmt, $sth, $rv, $scaffold_id, $clustering_method, $orthogroup_id, $taxon_id, $gene_id );\n+ my ( $gene_id_db, $scaffold_id_db, $orthogroup_id_db, $taxon_id_db );\n+ while(<IN>){\n+ chomp;\n+ if (/^gene_id/) {\n+ # gene_id scaffold_id clustering_method orthogroup_id species_name\n+ next;\n+ }\n+ my @fields = split(/\\t/, $_);\n+ # gnl_Fakge_v1.0_AT1G03390.1 22Gv1.1 orthomcl 3 Fake genome\n+ $gene_id = $fields[0];\n+ $scaffold_id = $fields[1];\n+ $clustering_method = $fields[2];\n+ $orthogroup_id = $fields[3];\n+ $species_name = $fields[4];\n+ $stmt = qq(SELECT id FROM plant_tribes_scaffold WHERE scaffold_id = \'$scaffold_id\' AND clustering_method = \'$clustering_method\';);\n+ $sth = $dbh->prepare( $stmt );\n+ $rv = $sth->execute() or die $DBI::errstr;\n+ if ($rv < 0) { print $DBI::errstr; }\n+ while (my @row = $sth->fetchrow_array()) {\n+ $scaffold_id_db = $row[0];\n+ }\n+ $stmt = qq(SELECT id FROM plant_tribes_orthogroup WHERE orthogroup_id = \'$orthogroup_id\' AND scaffold_id = \'$scaffold_id_db\';);\n+ $sth = $dbh->prepare( $stmt );\n+ $rv = $sth->execute() or die $DBI::errstr;\n+ if ($rv < 0) { print $DBI::errstr; }\n+ while (my @row = $sth->fetchrow_array()) {\n+ $orthogroup_id_db = $row[0];\n+ }\n+ $stmt = qq(SELECT id FROM plant_tribes_taxon WHERE species_name = \'$species_name\' AND scaffold_id = \'$scaffold_id_db\';);\n+ $sth = $dbh->prepare( $stmt );\n+ $rv = $sth->execute() or die $DBI::errstr;\n+ if ($rv < 0) { print $DBI::errstr; }\n+ while (my @row = $sth->fetchrow_array()) {\n+ $taxon_id_db = $row[0];\n+ }\n+ $stmt = qq(SELECT id FROM plant_tribes_gene WHERE gene_id = \'$gene_id\' );\n+ $sth = $dbh->prepare( $stmt );\n+ $rv = $sth->execute() or die $DBI::errstr;\n+ if ($rv < 0) { print $DBI::errstr; }\n+ while (my @row = $sth->fetchrow_array()) {\n+ $gene_id_db = $row[0];\n+ }\n+ $stmt = qq(INSERT INTO gene_scaffold_orthogroup_taxon_association (gene_id, scaffold_id, orthogroup_id, taxon_id) VALUES ($gene_id_db, $scaffold_id_db, $orthogroup_id_db, $taxon_id_db));\n+ $rv = $dbh->do($stmt) or die $DBI::errstr;\n+ $num_recs = $num_recs + 1;\n+ }\n+ close IN;\n+ log_msg("$num_recs records for $scaffold $clustering_method were successfully inserted into the gene_scaffold_orthogroup_taxon_association table.");\n+ $dbh->disconnect();\n+}\n' |
| b |
| diff -r 000000000000 -r 2b0906489073 gene_family_scaffold_updater.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_scaffold_updater.xml Tue Aug 21 13:00:21 2018 -0400 |
| [ |
| @@ -0,0 +1,91 @@ +<tool id="plant_tribes_gene_family_scaffold_updater" name="Update PlantTribes scaffold" version="@WRAPPER_VERSION@.0.0"> + <description>with a new genome</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="2.7.1">blast</requirement> + <requirement type="package" version="3.1b2">hmmer</requirement> + <requirement type="package" version="7.313">mafft</requirement> + <requirement type="package" version="5.22.0">perl</requirement> + <requirement type="package" version="1.641">perl-dbi</requirement> + <requirement type="package" version="3.5.3">perl-dbd-pg</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +perl '$__tool_directory__/gene_family_scaffold_updater.pl' +--database_connection_string '$__app__.config.plant_tribes_database_connection' +--proteins '$input_proteins' +--coding_sequences '$input_coding_sequences' +--scaffold '$GALAXY_DATA_INDEX_DIR/plant_tribes/scaffolds/$scaffold' +--species_name '$species_name' +--species_code '$species_code' +--species_family '$species_family' +--species_order '$species_order' +--species_group '$species_group' +--species_clade '$species_clade' +--rooting_order_species_code '$rooting_order_species_code' +--num_threads \${GALAXY_SLOTS:-4} +&>'$output']]></command> + <inputs> + <expand macro="param_scaffold"/> + <param name="input_proteins" format="fasta" type="data" label="Proteins fasta file"> + <validator type="empty_field"/> + </param> + <param name="input_coding_sequences" format="fasta" type="data" label="Coding sequences fasta file"> + <validator type="empty_field"/> + </param> + <param name="species_name" type="text" value="" label="Species name"> + <validator type="empty_field"/> + </param> + <param name="species_code" type="text" value="" label="Species code"> + <validator type="empty_field"/> + </param> + <param name="species_family" type="text" value="" label="Species family"> + <validator type="empty_field"/> + </param> + <param name="species_order" type="text" value="" label="Species order"> + <validator type="empty_field"/> + </param> + <param name="species_group" type="text" value="" label="Species group"> + <validator type="empty_field"/> + </param> + <param name="species_clade" type="text" value="" label="Species clade"> + <validator type="empty_field"/> + </param> + <param name="rooting_order_species_code" type="text" label="Species code for rooting order" help="The new species above will be placed immediately after this species code in the rooting order configuration file"> + <validator type="empty_field"/> + </param> + </inputs> + <outputs> + <data name="output" format="txt"/> + </outputs> + <tests> + <test> + <!--Testing this tool is a bit difficult at the current time.--> + </test> + </tests> + <help> +This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary +analyses of genome-scale gene families and transcriptomes. This tool adds a new genome to a scaffold installed into Galaxy +by the PlantTribes Scaffolds Downloader data manager tool. + +----- + +**Required options** + + * **Proteins fasta file** - amino acids (proteins) sequences fasta file for new genome. + * **Coding sequences fasta file** - corresponding coding sequences (CDS) fasta file for new genome. + * **Gene family scaffold** - one of the PlantTribes gene family scaffolds, installed into Galaxy by the PlantTribes Scaffold Data Manager tool. + </help> + <citations> + <citation type="bibtex"> + @unpublished{None, + author = {Eric Wafula}, + title = {None}, + year = {None}, + eprint = {None}, + url = {https://github.com/dePamphilis/PlantTribes}} + </citation> + </citations> +</tool> + |
| b |
| diff -r 000000000000 -r 2b0906489073 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Aug 21 13:00:21 2018 -0400 |
| b |
| @@ -0,0 +1,27 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">1.0</token> + <xml name="param_method"> + <param name="method" type="select" label="Protein clustering method"> + <option value="gfam" selected="true">GFam</option> + <option value="orthofinder">OrthoFinder</option> + <option value="orthomcl">OrthoMCL</option> + </param> + </xml> + <xml name="param_scaffold"> + <param name="scaffold" type="select" label="Gene family scaffold"> + <options from_data_table="plant_tribes_scaffolds" /> + <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." /> + </param> + </xml> + <xml name="citation1"> + <citation type="bibtex"> + @misc{None, + journal = {None}, + author = {1. Wafula EK}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes},} + </citation> + </xml> +</macros> |
| b |
| diff -r 000000000000 -r 2b0906489073 plant_tribes_scaffolds.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc Tue Aug 21 13:00:21 2018 -0400 |
| b |
| @@ -0,0 +1,3 @@ +## Plant Tribes scaffolds +#Value Name Path Description +22Gv1.1 22Gv1.1 ${__HERE__}/test-data/tool-data/plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) |
| b |
| diff -r 000000000000 -r 2b0906489073 plant_tribes_scaffolds.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc.sample Tue Aug 21 13:00:21 2018 -0400 |
| b |
| @@ -0,0 +1,4 @@ +## Plant Tribes scaffolds +#Value Name Path Description +#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) +#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) |
| b |
| diff -r 000000000000 -r 2b0906489073 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Aug 21 13:00:21 2018 -0400 |
| b |
| @@ -0,0 +1,6 @@ +<tables> + <table name="plant_tribes_scaffolds" comment_char="#"> + <columns>value, name, path, description</columns> + <file path="tool-data/plant_tribes_scaffolds.loc" /> + </table> +</tables> |
| b |
| diff -r 000000000000 -r 2b0906489073 tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Aug 21 13:00:21 2018 -0400 |
| b |
| @@ -0,0 +1,6 @@ +<tables> + <table name="plant_tribes_scaffolds" comment_char="#"> + <columns>value, name, path, description</columns> + <file path="${__HERE__}/plant_tribes_scaffolds.loc" /> + </table> +</tables> |