Galaxy |

Changeset 0:b59ae99e47d4 (2020-01-06)

Next changeset 1:7e2debc267eb (2020-01-06)

Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/drep commit b155a1d533b7317ceb0ec642ffe3e986117df539"

added:
drep_compare.xml
drep_dereplicate.xml
macros.xml

diff -r 000000000000 -r b59ae99e47d4 drep_compare.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/drep_compare.xml Mon Jan 06 11:11:06 2020 -0500

[

@@ -0,0 +1,59 @@
+<tool id="drep_compare" name="dRep compare" version="@VERSION@.0" python_template_version="3.5">
+    <description>compare a list of genomes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+         @PREPARE_GENOMES@
+         dRep compare outdir
+         @GENOME_COMPARISON_OPTIONS@
+         @CLUSTERING_OPTIONS@
+         @TAXONOMY_OPTIONS@
+         @WARNING_OPTIONS@
+         @GENOMES@
+    ]]></command>
+    <inputs>
+        <expand macro="genomes"/>
+        <expand macro="genome_comparison_options"/>
+        <expand macro="clustering_options"/>
+        <expand macro="taxonomy_options"/>
+        <expand macro="warning_options"/>
+    </inputs>
+    <outputs>
+        <expand macro="common_outputs" />
+
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+
+usage: drep compare [-p PROCESSORS] [-d] [-h] [-ms MASH_SKETCH]
+                    [--S_algorithm {ANIn,goANI,ANImf,gANI}]
+                    [-n_PRESET {normal,tight}] [-pa P_ANI] [-sa S_ANI]
+                    [--SkipMash] [--SkipSecondary] [-nc COV_THRESH]
+                    [-cm {total,larger}] [--clusterAlg CLUSTERALG] [--run_tax]
+                    [--tax_method {percent,max}] [-per PERCENT]
+                    [--cent_index CENT_INDEX] [--warn_dist WARN_DIST]
+                    [--warn_sim WARN_SIM] [--warn_aln WARN_ALN]
+                    [-g [GENOMES [GENOMES ...]]]
+                    work_directory
+
+
+    @GENOMES_HELP@
+    @GENOME_COMPARISON_HELP@
+    @CLUSTERING_HELP@
+    @TAXONOMY_HELP@
+    @WARNINGS_HELP@
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r b59ae99e47d4 drep_dereplicate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/drep_dereplicate.xml Mon Jan 06 11:11:06 2020 -0500

[

@@ -0,0 +1,66 @@
+<tool id="drep_dereplicate" name="dRep dereplicate" version="@VERSION@.0" python_template_version="3.5">
+    <description>De-replicate a list of genomes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+         @PREPARE_GENOMES@
+         dRep dereplicate outdir
+         @FILTER_OPTIONS@
+         @GENOME_COMPARISON_OPTIONS@
+         @CLUSTERING_OPTIONS@
+         @SCORING_OPTIONS@
+         @TAXONOMY_OPTIONS@
+         @WARNING_OPTIONS@
+         @GENOMES@
+    ]]></command>
+    <inputs>
+        <expand macro="genomes"/>
+        <expand macro="filtering_options"/>
+        <expand macro="genome_comparison_options"/>
+        <expand macro="clustering_options"/>
+        <expand macro="scoring_options"/>
+        <expand macro="taxonomy_options"/>
+        <expand macro="warning_options"/>
+    </inputs>
+    <outputs>
+        <expand macro="common_outputs" />
+        <collection name="dereplicated_genomes" type="list" label="dereplicated_genomes">
+             <discover_datasets pattern="__designation__" directory="out_drep/dereplicated_genomes" ext='fasta'/>
+        </collection>
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+
+usage: drep dereplicate [-p PROCESSORS] [-d] [-h] [-l LENGTH]
+                        [-comp COMPLETENESS] [-con CONTAMINATION]
+                        [--ignoreGenomeQuality] [-ms MASH_SKETCH]
+                        [--S_algorithm {goANI,ANIn,ANImf,gANI}]
+                        [-n_PRESET {normal,tight}] [-pa P_ANI] [-sa S_ANI]
+                        [--SkipMash] [--SkipSecondary] [-nc COV_THRESH]
+                        [-cm {total,larger}] [--clusterAlg CLUSTERALG]
+                        [-comW COMPLETENESS_WEIGHT]
+                        [-conW CONTAMINATION_WEIGHT]
+                        [-strW STRAIN_HETEROGENEITY_WEIGHT] [-N50W N50_WEIGHT]
+                        [-sizeW SIZE_WEIGHT] [--run_tax]
+                        [--tax_method {percent,max}] [-per PERCENT]
+                        [--cent_index CENT_INDEX] [--warn_dist WARN_DIST]
+                        [--warn_sim WARN_SIM] [--warn_aln WARN_ALN]
+                        [-g [GENOMES [GENOMES ...]]]
+                        [--checkM_method {taxonomy_wf,lineage_wf}]
+                        [--genomeInfo GENOMEINFO]
+                        work_directory
+
+    @GENOMES_HELP@
+    @FILTERING_HELP@
+    @GENOME_COMPARISON_HELP@
+    @CLUSTERING_HELP@
+    @SCORING_HELP@
+    @TAXONOMY_HELP@
+    @WARNINGS_HELP@
+
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>

diff -r 000000000000 -r b59ae99e47d4 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jan 06 11:11:06 2020 -0500

[

b'@@ -0,0 +1,388 @@\n+<macros>\n+ <token name="@VERSION@">2.3.2</token>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="package" version="@VERSION@">drep</requirement>\n+ <yield/>\n+ </requirements>\n+ </xml>\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1038/ismej.2017.126</citation>\n+ <yield />\n+ </citations>\n+ </xml>\n+\n+\n+ <xml name="genomes">\n+ <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/>\n+ </xml>\n+ <token name="@PREPARE_GENOMES@"><![CDATA[\n+ #import re \n+ #set $genomefiles = [] \n+ #for $genome in $genomes\n+ #set $input_name = $re.sub(\'[^\\w\\-_.]\', \'_\',str($genome.element_identifier.split(\'/\')[-1]))\n+ ln -s \'${genome}\' \'${input_name}\' &&\n+ $genomefiles.append($input_name)\n+ #end for\n+]]></token>\n+ <token name="@GENOMES@"><![CDATA[\n+ -g \n+ #for $genomefile in $genomefiles\n+ \'${genomefile}\' \n+ #end for\n+]]></token>\n+\n+\n+ <xml name="checkm_method">\n+ <param argument="--checkM_method" type="select" label="checkm method" optional="true">\n+ <option value="lineage_wf">lineage_wf (more accurate)</option>\n+ <option value="taxonomy_wf">taxonomy_wf (faster)</option>\n+ </param>\n+ </xml>\n+ <token name="@CHECKM_METHOD@"><![CDATA[\n+ #if $checkM_method:\n+ --checkM_method $checkM_method \n+ #end if\n+]]></token>\n+\n+ <xml name="filtering_options">\n+ <conditional name="filter">\n+ <param name="set_options" type="select" label="set filtering options">\n+ <option value="yes">Yes</option>\n+ <option value="no" selected="true">No</option>\n+ </param>\n+ <when value="yes">\n+ <param argument="--length" type="integer" value="50000" label="Minimum genome length"/>\n+ <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/>\n+ <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/>\n+ \n+ <conditional name="quality">\n+ <param argument="source" type="select" label="genome quality">\n+ <help>\n+ --ignoreGenomeQuality is useful with\n+ bacteriophages or eukaryotes or things where checkM\n+ scoring does not work. Will only choose genomes based\n+ on length and N50. \n+ </help>\n+ <option value="checkm" selected="true">Run checkM</option>\n+ <option value="genomeInfo">User supplied genomeInfo csv file</option>\n+ <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option>\n+ </param>\n+ <when value="checkm">\n+ <param argument="--checkM_method" type="select" label="checkm method" optional="true">\n+ <option value="lineage_wf">lineage_wf (more accurate)</option>\n+ <option value="taxonomy_wf">taxonomy_wf (faster)</option>\n+ </param>\n+ </when>\n+ <when value="genomeInfo">\n+ <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files">\n+ <help><![CDATA[\n+ A CSV dataset that must contain: [\n+ "genome"(history dataset name of .fasta dataset of that genome), \n+ "completeness"(0-100 value for completeness of the genome), \n+ "contamination"(0-100 value of the contamination of the genome)] \n+ '..b' gANI = Identify and align ORFs; compare aligned ORFS\n+ (default: ANImf)\n+ -n_PRESET {normal,tight}\n+ Presets to pass to nucmer\n+ tight = only align highly conserved regions\n+ normal = default ANIn parameters (default: normal)\n+\n+]]></token>\n+\n+ <token name="@CLUSTERING_HELP@"><![CDATA[\n+CLUSTERING PARAMETERS:\n+ -pa P_ANI, --P_ani P_ANI\n+ ANI threshold to form primary (MASH) clusters\n+ (default: 0.9)\n+ -sa S_ANI, --S_ani S_ANI\n+ ANI threshold to form secondary clusters (default:\n+ 0.99)\n+ --SkipMash Skip MASH clustering, just do secondary clustering on\n+ all genomes (default: False)\n+ --SkipSecondary Skip secondary clustering, just perform MASH\n+ clustering (default: False)\n+ -nc COV_THRESH, --cov_thresh COV_THRESH\n+ Minmum level of overlap between genomes when doing\n+ secondary comparisons (default: 0.1)\n+ -cm {total,larger}, --coverage_method {total,larger}\n+ Method to calculate coverage of an alignment\n+ (for ANIn/ANImf only; gANI can only do larger method)\n+ total = 2*(aligned length) / (sum of total genome lengths)\n+ larger = max((aligned length / genome 1), (aligned_length / genome2))\n+ (default: larger)\n+ --clusterAlg CLUSTERALG\n+ Algorithm used to cluster genomes (passed to\n+ scipy.cluster.hierarchy.linkage (default: average)\n+\n+]]></token>\n+\n+ <token name="@SCORING_HELP@"><![CDATA[\n+SCORING CRITERIA\n+Based off of the formula: \n+A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size)\n+\n+A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight:\n+ -comW COMPLETENESS_WEIGHT, --completeness_weight COMPLETENESS_WEIGHT\n+ completeness weight (default: 1)\n+ -conW CONTAMINATION_WEIGHT, --contamination_weight CONTAMINATION_WEIGHT\n+ contamination weight (default: 5)\n+ -strW STRAIN_HETEROGENEITY_WEIGHT, --strain_heterogeneity_weight STRAIN_HETEROGENEITY_WEIGHT\n+ strain heterogeneity weight (default: 1)\n+ -N50W N50_WEIGHT, --N50_weight N50_WEIGHT\n+ weight of log(genome N50) (default: 0.5)\n+ -sizeW SIZE_WEIGHT, --size_weight SIZE_WEIGHT\n+ weight of log(genome size) (default: 0)\n+\n+]]></token>\n+\n+ <token name="@TAXONOMY_HELP@"><![CDATA[\n+TAXONOMY:\n+ --run_tax generate taxonomy information (Tdb) (default: False)\n+ --tax_method {percent,max}\n+ Method of determining taxonomy\n+ percent = The most descriptive taxonimic level with at least (per) hits\n+ max = The centrifuge taxonomic level with the most overall hits (default: percent)\n+ -per PERCENT, --percent PERCENT\n+ minimum percent for percent method (default: 50)\n+ --cent_index CENT_INDEX\n+ path to centrifuge index (for example,\n+ /home/mattolm/download/centrifuge/indices/b+h+v\n+ (default: None)\n+\n+]]></token>\n+\n+ <token name="@WARNINGS_HELP@"><![CDATA[\n+WARNINGS:\n+ --warn_dist WARN_DIST\n+ How far from the threshold to throw cluster warnings\n+ (default: 0.25)\n+ --warn_sim WARN_SIM Similarity threshold for warnings between dereplicated\n+ genomes (default: 0.98)\n+ --warn_aln WARN_ALN Minimum aligned fraction for warnings between\n+ dereplicated genomes (ANIn) (default: 0.25)\n+\n+]]></token>\n+\n+\n+</macros>\n'