| Next changeset 1:7e2debc267eb (2020-01-06) |
|
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/drep commit b155a1d533b7317ceb0ec642ffe3e986117df539" |
|
added:
drep_compare.xml drep_dereplicate.xml macros.xml |
| b |
| diff -r 000000000000 -r b59ae99e47d4 drep_compare.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drep_compare.xml Mon Jan 06 11:11:06 2020 -0500 |
| [ |
| @@ -0,0 +1,59 @@ +<tool id="drep_compare" name="dRep compare" version="@VERSION@.0" python_template_version="3.5"> + <description>compare a list of genomes</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ + @PREPARE_GENOMES@ + dRep compare outdir + @GENOME_COMPARISON_OPTIONS@ + @CLUSTERING_OPTIONS@ + @TAXONOMY_OPTIONS@ + @WARNING_OPTIONS@ + @GENOMES@ + ]]></command> + <inputs> + <expand macro="genomes"/> + <expand macro="genome_comparison_options"/> + <expand macro="clustering_options"/> + <expand macro="taxonomy_options"/> + <expand macro="warning_options"/> + </inputs> + <outputs> + <expand macro="common_outputs" /> +<!-- +outdir/data_tables/Cdb.csv +outdir/data_tables/Mdb.csv +outdir/data_tables/Ndb.csv +outdir/data_tables/Bdb.csv + + <data name="foldChange" format="tabular" label="${tool.name} on ${on_string}: BayesianFoldChangeAnalysis.tsv" from_work_dir="out/BayesianFoldChangeAnalysis.tsv"> + <filter>'bayesian' in experiment and 'ctr' in experiment['bayesian']</filter> + </data> +--> + </outputs> + <help><![CDATA[ + TODO: Fill in help. + +usage: drep compare [-p PROCESSORS] [-d] [-h] [-ms MASH_SKETCH] + [--S_algorithm {ANIn,goANI,ANImf,gANI}] + [-n_PRESET {normal,tight}] [-pa P_ANI] [-sa S_ANI] + [--SkipMash] [--SkipSecondary] [-nc COV_THRESH] + [-cm {total,larger}] [--clusterAlg CLUSTERALG] [--run_tax] + [--tax_method {percent,max}] [-per PERCENT] + [--cent_index CENT_INDEX] [--warn_dist WARN_DIST] + [--warn_sim WARN_SIM] [--warn_aln WARN_ALN] + [-g [GENOMES [GENOMES ...]]] + work_directory + + + @GENOMES_HELP@ + @GENOME_COMPARISON_HELP@ + @CLUSTERING_HELP@ + @TAXONOMY_HELP@ + @WARNINGS_HELP@ + + ]]></help> + <expand macro="citations" /> +</tool> |
| b |
| diff -r 000000000000 -r b59ae99e47d4 drep_dereplicate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drep_dereplicate.xml Mon Jan 06 11:11:06 2020 -0500 |
| [ |
| @@ -0,0 +1,66 @@ +<tool id="drep_dereplicate" name="dRep dereplicate" version="@VERSION@.0" python_template_version="3.5"> + <description>De-replicate a list of genomes</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"><![CDATA[ + @PREPARE_GENOMES@ + dRep dereplicate outdir + @FILTER_OPTIONS@ + @GENOME_COMPARISON_OPTIONS@ + @CLUSTERING_OPTIONS@ + @SCORING_OPTIONS@ + @TAXONOMY_OPTIONS@ + @WARNING_OPTIONS@ + @GENOMES@ + ]]></command> + <inputs> + <expand macro="genomes"/> + <expand macro="filtering_options"/> + <expand macro="genome_comparison_options"/> + <expand macro="clustering_options"/> + <expand macro="scoring_options"/> + <expand macro="taxonomy_options"/> + <expand macro="warning_options"/> + </inputs> + <outputs> + <expand macro="common_outputs" /> + <collection name="dereplicated_genomes" type="list" label="dereplicated_genomes"> + <discover_datasets pattern="__designation__" directory="out_drep/dereplicated_genomes" ext='fasta'/> + </collection> + </outputs> + <help><![CDATA[ + TODO: Fill in help. + +usage: drep dereplicate [-p PROCESSORS] [-d] [-h] [-l LENGTH] + [-comp COMPLETENESS] [-con CONTAMINATION] + [--ignoreGenomeQuality] [-ms MASH_SKETCH] + [--S_algorithm {goANI,ANIn,ANImf,gANI}] + [-n_PRESET {normal,tight}] [-pa P_ANI] [-sa S_ANI] + [--SkipMash] [--SkipSecondary] [-nc COV_THRESH] + [-cm {total,larger}] [--clusterAlg CLUSTERALG] + [-comW COMPLETENESS_WEIGHT] + [-conW CONTAMINATION_WEIGHT] + [-strW STRAIN_HETEROGENEITY_WEIGHT] [-N50W N50_WEIGHT] + [-sizeW SIZE_WEIGHT] [--run_tax] + [--tax_method {percent,max}] [-per PERCENT] + [--cent_index CENT_INDEX] [--warn_dist WARN_DIST] + [--warn_sim WARN_SIM] [--warn_aln WARN_ALN] + [-g [GENOMES [GENOMES ...]]] + [--checkM_method {taxonomy_wf,lineage_wf}] + [--genomeInfo GENOMEINFO] + work_directory + + @GENOMES_HELP@ + @FILTERING_HELP@ + @GENOME_COMPARISON_HELP@ + @CLUSTERING_HELP@ + @SCORING_HELP@ + @TAXONOMY_HELP@ + @WARNINGS_HELP@ + + + ]]></help> + <expand macro="citations" /> +</tool> |
| b |
| diff -r 000000000000 -r b59ae99e47d4 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jan 06 11:11:06 2020 -0500 |
| [ |
| b'@@ -0,0 +1,388 @@\n+<macros>\n+ <token name="@VERSION@">2.3.2</token>\n+ <xml name="requirements">\n+ <requirements>\n+ <requirement type="package" version="@VERSION@">drep</requirement>\n+ <yield/>\n+ </requirements>\n+ </xml>\n+ <xml name="citations">\n+ <citations>\n+ <citation type="doi">10.1038/ismej.2017.126</citation>\n+ <yield />\n+ </citations>\n+ </xml>\n+\n+\n+ <xml name="genomes">\n+ <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/>\n+ </xml>\n+ <token name="@PREPARE_GENOMES@"><![CDATA[\n+ #import re \n+ #set $genomefiles = [] \n+ #for $genome in $genomes\n+ #set $input_name = $re.sub(\'[^\\w\\-_.]\', \'_\',str($genome.element_identifier.split(\'/\')[-1]))\n+ ln -s \'${genome}\' \'${input_name}\' &&\n+ $genomefiles.append($input_name)\n+ #end for\n+]]></token>\n+ <token name="@GENOMES@"><![CDATA[\n+ -g \n+ #for $genomefile in $genomefiles\n+ \'${genomefile}\' \n+ #end for\n+]]></token>\n+\n+\n+ <xml name="checkm_method">\n+ <param argument="--checkM_method" type="select" label="checkm method" optional="true">\n+ <option value="lineage_wf">lineage_wf (more accurate)</option>\n+ <option value="taxonomy_wf">taxonomy_wf (faster)</option>\n+ </param>\n+ </xml>\n+ <token name="@CHECKM_METHOD@"><![CDATA[\n+ #if $checkM_method:\n+ --checkM_method $checkM_method \n+ #end if\n+]]></token>\n+\n+ <xml name="filtering_options">\n+ <conditional name="filter">\n+ <param name="set_options" type="select" label="set filtering options">\n+ <option value="yes">Yes</option>\n+ <option value="no" selected="true">No</option>\n+ </param>\n+ <when value="yes">\n+ <param argument="--length" type="integer" value="50000" label="Minimum genome length"/>\n+ <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/>\n+ <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/>\n+ \n+ <conditional name="quality">\n+ <param argument="source" type="select" label="genome quality">\n+ <help>\n+ --ignoreGenomeQuality is useful with\n+ bacteriophages or eukaryotes or things where checkM\n+ scoring does not work. Will only choose genomes based\n+ on length and N50. \n+ </help>\n+ <option value="checkm" selected="true">Run checkM</option>\n+ <option value="genomeInfo">User supplied genomeInfo csv file</option>\n+ <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option>\n+ </param>\n+ <when value="checkm">\n+ <param argument="--checkM_method" type="select" label="checkm method" optional="true">\n+ <option value="lineage_wf">lineage_wf (more accurate)</option>\n+ <option value="taxonomy_wf">taxonomy_wf (faster)</option>\n+ </param>\n+ </when>\n+ <when value="genomeInfo">\n+ <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files">\n+ <help><![CDATA[\n+ A CSV dataset that must contain: [\n+ "genome"(history dataset name of .fasta dataset of that genome), \n+ "completeness"(0-100 value for completeness of the genome), \n+ "contamination"(0-100 value of the contamination of the genome)] \n+ '..b' gANI = Identify and align ORFs; compare aligned ORFS\n+ (default: ANImf)\n+ -n_PRESET {normal,tight}\n+ Presets to pass to nucmer\n+ tight = only align highly conserved regions\n+ normal = default ANIn parameters (default: normal)\n+\n+]]></token>\n+\n+ <token name="@CLUSTERING_HELP@"><![CDATA[\n+CLUSTERING PARAMETERS:\n+ -pa P_ANI, --P_ani P_ANI\n+ ANI threshold to form primary (MASH) clusters\n+ (default: 0.9)\n+ -sa S_ANI, --S_ani S_ANI\n+ ANI threshold to form secondary clusters (default:\n+ 0.99)\n+ --SkipMash Skip MASH clustering, just do secondary clustering on\n+ all genomes (default: False)\n+ --SkipSecondary Skip secondary clustering, just perform MASH\n+ clustering (default: False)\n+ -nc COV_THRESH, --cov_thresh COV_THRESH\n+ Minmum level of overlap between genomes when doing\n+ secondary comparisons (default: 0.1)\n+ -cm {total,larger}, --coverage_method {total,larger}\n+ Method to calculate coverage of an alignment\n+ (for ANIn/ANImf only; gANI can only do larger method)\n+ total = 2*(aligned length) / (sum of total genome lengths)\n+ larger = max((aligned length / genome 1), (aligned_length / genome2))\n+ (default: larger)\n+ --clusterAlg CLUSTERALG\n+ Algorithm used to cluster genomes (passed to\n+ scipy.cluster.hierarchy.linkage (default: average)\n+\n+]]></token>\n+\n+ <token name="@SCORING_HELP@"><![CDATA[\n+SCORING CRITERIA\n+Based off of the formula: \n+A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size)\n+\n+A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight:\n+ -comW COMPLETENESS_WEIGHT, --completeness_weight COMPLETENESS_WEIGHT\n+ completeness weight (default: 1)\n+ -conW CONTAMINATION_WEIGHT, --contamination_weight CONTAMINATION_WEIGHT\n+ contamination weight (default: 5)\n+ -strW STRAIN_HETEROGENEITY_WEIGHT, --strain_heterogeneity_weight STRAIN_HETEROGENEITY_WEIGHT\n+ strain heterogeneity weight (default: 1)\n+ -N50W N50_WEIGHT, --N50_weight N50_WEIGHT\n+ weight of log(genome N50) (default: 0.5)\n+ -sizeW SIZE_WEIGHT, --size_weight SIZE_WEIGHT\n+ weight of log(genome size) (default: 0)\n+\n+]]></token>\n+\n+ <token name="@TAXONOMY_HELP@"><![CDATA[\n+TAXONOMY:\n+ --run_tax generate taxonomy information (Tdb) (default: False)\n+ --tax_method {percent,max}\n+ Method of determining taxonomy\n+ percent = The most descriptive taxonimic level with at least (per) hits\n+ max = The centrifuge taxonomic level with the most overall hits (default: percent)\n+ -per PERCENT, --percent PERCENT\n+ minimum percent for percent method (default: 50)\n+ --cent_index CENT_INDEX\n+ path to centrifuge index (for example,\n+ /home/mattolm/download/centrifuge/indices/b+h+v\n+ (default: None)\n+\n+]]></token>\n+\n+ <token name="@WARNINGS_HELP@"><![CDATA[\n+WARNINGS:\n+ --warn_dist WARN_DIST\n+ How far from the threshold to throw cluster warnings\n+ (default: 0.25)\n+ --warn_sim WARN_SIM Similarity threshold for warnings between dereplicated\n+ genomes (default: 0.98)\n+ --warn_aln WARN_ALN Minimum aligned fraction for warnings between\n+ dereplicated genomes (ANIn) (default: 0.25)\n+\n+]]></token>\n+\n+\n+</macros>\n' |