Repository 'drep'
hg clone https://toolshed.g2.bx.psu.edu/repos/jjohnson/drep

Changeset 0:b59ae99e47d4 (2020-01-06)
Next changeset 1:7e2debc267eb (2020-01-06)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/drep commit b155a1d533b7317ceb0ec642ffe3e986117df539"
added:
drep_compare.xml
drep_dereplicate.xml
macros.xml
b
diff -r 000000000000 -r b59ae99e47d4 drep_compare.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/drep_compare.xml Mon Jan 06 11:11:06 2020 -0500
[
@@ -0,0 +1,59 @@
+<tool id="drep_compare" name="dRep compare" version="@VERSION@.0" python_template_version="3.5">
+    <description>compare a list of genomes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+         @PREPARE_GENOMES@         
+         dRep compare outdir
+         @GENOME_COMPARISON_OPTIONS@
+         @CLUSTERING_OPTIONS@
+         @TAXONOMY_OPTIONS@
+         @WARNING_OPTIONS@        
+         @GENOMES@
+    ]]></command>
+    <inputs>
+        <expand macro="genomes"/>
+        <expand macro="genome_comparison_options"/>
+        <expand macro="clustering_options"/>
+        <expand macro="taxonomy_options"/>
+        <expand macro="warning_options"/>
+    </inputs>
+    <outputs>
+        <expand macro="common_outputs" />
+<!--
+outdir/data_tables/Cdb.csv
+outdir/data_tables/Mdb.csv
+outdir/data_tables/Ndb.csv
+outdir/data_tables/Bdb.csv
+
+        <data name="foldChange" format="tabular" label="${tool.name} on ${on_string}: BayesianFoldChangeAnalysis.tsv" from_work_dir="out/BayesianFoldChangeAnalysis.tsv">
+            <filter>'bayesian' in experiment and 'ctr' in experiment['bayesian']</filter>
+        </data>
+-->
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+
+usage: drep compare [-p PROCESSORS] [-d] [-h] [-ms MASH_SKETCH]
+                    [--S_algorithm {ANIn,goANI,ANImf,gANI}]
+                    [-n_PRESET {normal,tight}] [-pa P_ANI] [-sa S_ANI]
+                    [--SkipMash] [--SkipSecondary] [-nc COV_THRESH]
+                    [-cm {total,larger}] [--clusterAlg CLUSTERALG] [--run_tax]
+                    [--tax_method {percent,max}] [-per PERCENT]
+                    [--cent_index CENT_INDEX] [--warn_dist WARN_DIST]
+                    [--warn_sim WARN_SIM] [--warn_aln WARN_ALN]
+                    [-g [GENOMES [GENOMES ...]]]
+                    work_directory
+
+
+    @GENOMES_HELP@
+    @GENOME_COMPARISON_HELP@
+    @CLUSTERING_HELP@
+    @TAXONOMY_HELP@
+    @WARNINGS_HELP@
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r b59ae99e47d4 drep_dereplicate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/drep_dereplicate.xml Mon Jan 06 11:11:06 2020 -0500
[
@@ -0,0 +1,66 @@
+<tool id="drep_dereplicate" name="dRep dereplicate" version="@VERSION@.0" python_template_version="3.5">
+    <description>De-replicate a list of genomes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+         @PREPARE_GENOMES@
+         dRep dereplicate outdir
+         @FILTER_OPTIONS@
+         @GENOME_COMPARISON_OPTIONS@
+         @CLUSTERING_OPTIONS@
+         @SCORING_OPTIONS@
+         @TAXONOMY_OPTIONS@
+         @WARNING_OPTIONS@        
+         @GENOMES@
+    ]]></command>
+    <inputs>
+        <expand macro="genomes"/>
+        <expand macro="filtering_options"/>
+        <expand macro="genome_comparison_options"/>
+        <expand macro="clustering_options"/>
+        <expand macro="scoring_options"/>
+        <expand macro="taxonomy_options"/>
+        <expand macro="warning_options"/>
+    </inputs>
+    <outputs>
+        <expand macro="common_outputs" />
+        <collection name="dereplicated_genomes" type="list" label="dereplicated_genomes">
+             <discover_datasets pattern="__designation__" directory="out_drep/dereplicated_genomes" ext='fasta'/>
+        </collection>
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+
+usage: drep dereplicate [-p PROCESSORS] [-d] [-h] [-l LENGTH]
+                        [-comp COMPLETENESS] [-con CONTAMINATION]
+                        [--ignoreGenomeQuality] [-ms MASH_SKETCH]
+                        [--S_algorithm {goANI,ANIn,ANImf,gANI}]
+                        [-n_PRESET {normal,tight}] [-pa P_ANI] [-sa S_ANI]
+                        [--SkipMash] [--SkipSecondary] [-nc COV_THRESH]
+                        [-cm {total,larger}] [--clusterAlg CLUSTERALG]
+                        [-comW COMPLETENESS_WEIGHT]
+                        [-conW CONTAMINATION_WEIGHT]
+                        [-strW STRAIN_HETEROGENEITY_WEIGHT] [-N50W N50_WEIGHT]
+                        [-sizeW SIZE_WEIGHT] [--run_tax]
+                        [--tax_method {percent,max}] [-per PERCENT]
+                        [--cent_index CENT_INDEX] [--warn_dist WARN_DIST]
+                        [--warn_sim WARN_SIM] [--warn_aln WARN_ALN]
+                        [-g [GENOMES [GENOMES ...]]]
+                        [--checkM_method {taxonomy_wf,lineage_wf}]
+                        [--genomeInfo GENOMEINFO]
+                        work_directory
+
+    @GENOMES_HELP@
+    @FILTERING_HELP@
+    @GENOME_COMPARISON_HELP@
+    @CLUSTERING_HELP@
+    @SCORING_HELP@
+    @TAXONOMY_HELP@
+    @WARNINGS_HELP@
+
+
+    ]]></help>
+    <expand macro="citations" />
+</tool>
b
diff -r 000000000000 -r b59ae99e47d4 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jan 06 11:11:06 2020 -0500
[
b'@@ -0,0 +1,388 @@\n+<macros>\n+    <token name="@VERSION@">2.3.2</token>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="@VERSION@">drep</requirement>\n+            <yield/>\n+        </requirements>\n+    </xml>\n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1038/ismej.2017.126</citation>\n+            <yield />\n+        </citations>\n+    </xml>\n+\n+\n+    <xml name="genomes">\n+        <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/>\n+    </xml>\n+    <token name="@PREPARE_GENOMES@"><![CDATA[\n+    #import re \n+    #set $genomefiles = [] \n+    #for $genome in $genomes\n+        #set $input_name = $re.sub(\'[^\\w\\-_.]\', \'_\',str($genome.element_identifier.split(\'/\')[-1]))\n+        ln -s \'${genome}\' \'${input_name}\' &&\n+        $genomefiles.append($input_name)\n+    #end for\n+]]></token>\n+    <token name="@GENOMES@"><![CDATA[\n+    -g \n+    #for $genomefile in $genomefiles\n+    \'${genomefile}\' \n+    #end for\n+]]></token>\n+\n+\n+    <xml name="checkm_method">\n+        <param argument="--checkM_method" type="select" label="checkm method" optional="true">\n+           <option value="lineage_wf">lineage_wf (more accurate)</option>\n+           <option value="taxonomy_wf">taxonomy_wf (faster)</option>\n+        </param>\n+    </xml>\n+    <token name="@CHECKM_METHOD@"><![CDATA[\n+    #if $checkM_method:\n+    --checkM_method $checkM_method \n+    #end if\n+]]></token>\n+\n+    <xml name="filtering_options">\n+        <conditional name="filter">\n+            <param name="set_options" type="select" label="set filtering options">\n+                <option value="yes">Yes</option>\n+                <option value="no" selected="true">No</option>\n+            </param>\n+            <when value="yes">\n+                <param argument="--length" type="integer" value="50000" label="Minimum genome length"/>\n+                <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/>\n+                <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/>\n+                 \n+                <conditional name="quality">\n+                    <param argument="source" type="select" label="genome quality">\n+                        <help>\n+                            --ignoreGenomeQuality is useful with\n+                            bacteriophages or eukaryotes or things where checkM\n+                            scoring does not work. Will only choose genomes based\n+                            on length and N50. \n+                        </help>\n+                        <option value="checkm" selected="true">Run checkM</option>\n+                        <option value="genomeInfo">User supplied genomeInfo csv file</option>\n+                        <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option>\n+                    </param>\n+                    <when value="checkm">\n+                        <param argument="--checkM_method" type="select" label="checkm method" optional="true">\n+                            <option value="lineage_wf">lineage_wf (more accurate)</option>\n+                            <option value="taxonomy_wf">taxonomy_wf (faster)</option>\n+                        </param>\n+                    </when>\n+                    <when value="genomeInfo">\n+                        <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files">\n+                            <help><![CDATA[\n+                            A CSV dataset that must contain: [\n+                            "genome"(history dataset name of .fasta dataset of that genome), \n+                            "completeness"(0-100 value for completeness of the genome), \n+                            "contamination"(0-100 value of the contamination of the genome)] \n+                       '..b'        gANI  = Identify and align ORFs; compare aligned ORFS\n+                         (default: ANImf)\n+  -n_PRESET {normal,tight}\n+                        Presets to pass to nucmer\n+                        tight   = only align highly conserved regions\n+                        normal  = default ANIn parameters (default: normal)\n+\n+]]></token>\n+\n+    <token name="@CLUSTERING_HELP@"><![CDATA[\n+CLUSTERING PARAMETERS:\n+  -pa P_ANI, --P_ani P_ANI\n+                        ANI threshold to form primary (MASH) clusters\n+                        (default: 0.9)\n+  -sa S_ANI, --S_ani S_ANI\n+                        ANI threshold to form secondary clusters (default:\n+                        0.99)\n+  --SkipMash            Skip MASH clustering, just do secondary clustering on\n+                        all genomes (default: False)\n+  --SkipSecondary       Skip secondary clustering, just perform MASH\n+                        clustering (default: False)\n+  -nc COV_THRESH, --cov_thresh COV_THRESH\n+                        Minmum level of overlap between genomes when doing\n+                        secondary comparisons (default: 0.1)\n+  -cm {total,larger}, --coverage_method {total,larger}\n+                        Method to calculate coverage of an alignment\n+                        (for ANIn/ANImf only; gANI can only do larger method)\n+                        total   = 2*(aligned length) / (sum of total genome lengths)\n+                        larger  = max((aligned length / genome 1), (aligned_length / genome2))\n+                         (default: larger)\n+  --clusterAlg CLUSTERALG\n+                        Algorithm used to cluster genomes (passed to\n+                        scipy.cluster.hierarchy.linkage (default: average)\n+\n+]]></token>\n+\n+    <token name="@SCORING_HELP@"><![CDATA[\n+SCORING CRITERIA\n+Based off of the formula: \n+A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size)\n+\n+A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight:\n+  -comW COMPLETENESS_WEIGHT, --completeness_weight COMPLETENESS_WEIGHT\n+                        completeness weight (default: 1)\n+  -conW CONTAMINATION_WEIGHT, --contamination_weight CONTAMINATION_WEIGHT\n+                        contamination weight (default: 5)\n+  -strW STRAIN_HETEROGENEITY_WEIGHT, --strain_heterogeneity_weight STRAIN_HETEROGENEITY_WEIGHT\n+                        strain heterogeneity weight (default: 1)\n+  -N50W N50_WEIGHT, --N50_weight N50_WEIGHT\n+                        weight of log(genome N50) (default: 0.5)\n+  -sizeW SIZE_WEIGHT, --size_weight SIZE_WEIGHT\n+                        weight of log(genome size) (default: 0)\n+\n+]]></token>\n+\n+    <token name="@TAXONOMY_HELP@"><![CDATA[\n+TAXONOMY:\n+  --run_tax             generate taxonomy information (Tdb) (default: False)\n+  --tax_method {percent,max}\n+                        Method of determining taxonomy\n+                        percent = The most descriptive taxonimic level with at least (per) hits\n+                        max     = The centrifuge taxonomic level with the most overall hits (default: percent)\n+  -per PERCENT, --percent PERCENT\n+                        minimum percent for percent method (default: 50)\n+  --cent_index CENT_INDEX\n+                        path to centrifuge index (for example,\n+                        /home/mattolm/download/centrifuge/indices/b+h+v\n+                        (default: None)\n+\n+]]></token>\n+\n+    <token name="@WARNINGS_HELP@"><![CDATA[\n+WARNINGS:\n+  --warn_dist WARN_DIST\n+                        How far from the threshold to throw cluster warnings\n+                        (default: 0.25)\n+  --warn_sim WARN_SIM   Similarity threshold for warnings between dereplicated\n+                        genomes (default: 0.98)\n+  --warn_aln WARN_ALN   Minimum aligned fraction for warnings between\n+                        dereplicated genomes (ANIn) (default: 0.25)\n+\n+]]></token>\n+\n+\n+</macros>\n'