diff sistr_cmd.xml @ 0:ebee10be4297 draft

planemo upload commit 1ea98fb88a93a571beda5bbd56449c946860a258
author nml
date Wed, 01 Mar 2017 12:35:39 -0500
parents
children 9d7e381dfa5a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sistr_cmd.xml	Wed Mar 01 12:35:39 2017 -0500
@@ -0,0 +1,331 @@
+<tool id="sistr_cmd" name="sistr_cmd" version="0.3.4">
+  <description>
+    Salmonella In Silico Typing Resource commandline tool for serovar prediction
+  </description>
+  <requirements>
+    <requirement type="package" version="0.3.4">sistr_cmd</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:" />
+  </stdio>
+  <command><![CDATA[
+  sistr 
+    #for $fasta in $input_fastas
+      -i '$fasta' '${$fasta.name.replace("." + $fasta.ext, "")}'
+    #end for
+    -f $output_format
+    #if $output_format == "tab"
+      -o sistr-report.tab
+    #elif $output_format == "csv"
+      -o sistr-report.csv
+    #elif $output_format == "json"
+      -o sistr-report.json
+    #end if
+    -p $cgmlst_profiles
+    -n $novel_alleles
+    -a $alleles_output
+    $use_full_cgmlst_db
+    $no_cgmlst
+    $run_mash
+    $qc
+    --threads "\${GALAXY_SLOTS:-1}"
+    -T "\${TMPDIR:-/tmp}"
+    $keep_tmp
+    $verbosity
+  ]]></command>
+  <inputs>
+    <param 
+      name="input_fastas" 
+      type="data" 
+      label="Input Genome(s)" 
+      optional="false" 
+      multiple="true"
+      format="fasta"
+      />
+    <param 
+      name="output_format" 
+      type="select" 
+      label="Results output format"
+      multiple="false">
+      <option value="tab" selected="true">
+        Tabular (tab-delimited values)
+      </option>
+      <option value="csv">
+        CSV (Comma Separated Values)
+      </option>
+      <option value="json">
+        JSON (JavaScript Object Notation)
+      </option>
+    </param>
+    <param 
+      name="use_full_cgmlst_db"
+      type="boolean"
+      checked="false"
+      truevalue="--use-full-cgmlst-db"
+      falsevalue=""
+      label="Use full cgMLST database for serovar prediction. About 10X slower with equivalent results to reduced centroid allele database."
+      />
+    <param 
+      name="run_mash"
+      type="boolean"
+      checked="true"
+      truevalue="--run-mash"
+      falsevalue=""
+      label="Run Mash MinHash-based serovar prediction"
+      />
+    <param 
+      name="no_cgmlst"
+      type="boolean"
+      checked="false"
+      truevalue="--no-cgmlst"
+      falsevalue=""
+      label="Skip running cgMLST-based serovar prediction"
+      />
+    <param 
+      name="qc"
+      type="boolean"
+      checked="true"
+      truevalue="--qc"
+      falsevalue=""
+      label="Basic QC of results"
+      />
+    <param 
+      name="keep_tmp"
+      type="boolean"
+      checked="false"
+      falsevalue=""
+      truevalue="--keep-tmp"
+      label="Keep temporary analysis directory"
+      />
+    <param 
+      name="verbosity"
+      type="select" 
+      label="Logging verbosity">
+      <option value="">
+        Error messages only
+      </option>
+      <option value="-v">
+        Show warning messages
+      </option>
+      <option value="-vv" selected="true">
+        Show info messages
+      </option>
+      <option value="-vvv">
+        Show debug messages
+      </option>
+    </param>
+  </inputs>
+  <outputs>
+    <data 
+      name="output_prediction_csv" 
+      format="csv" 
+      label="SISTR Results"
+      from_work_dir="sistr-report.csv">
+      <filter>output_format == "csv"</filter>
+    </data>
+    <data 
+      name="output_prediction_json" 
+      format="json" 
+      label="SISTR Results"
+      from_work_dir="sistr-report.json">
+      <filter>output_format == "json"</filter>
+    </data>
+    <data 
+      name="output_prediction_tab" 
+      format="tabular" 
+      label="SISTR Results"
+      from_work_dir="sistr-report.tab">
+      <filter>output_format == "tab"</filter>
+    </data>
+    <data 
+      name="cgmlst_profiles" 
+      format="csv" 
+      label="cgMLST results" />
+    <data
+      name="novel_alleles"
+      format="fasta" 
+      label="Novel cgMLST alleles" />
+    <data 
+      name="alleles_output"
+      format="json"
+      label="cgMLST allele match results" />
+  </outputs>
+  <tests>
+    <test>
+      <param name="input_fastas" value="AE014613-699860.fasta"/>
+      <param name="output_format" value="tab"/>
+      <output 
+        name="novel_alleles" 
+        value="novel-alleles.fasta"
+        ftype="fasta"
+        compare="sim_size"/>
+      <output 
+        name="cgmlst_profiles" 
+        value="cgmlst-profiles.csv"
+        ftype="csv"
+        lines_diff="2">
+        <assert_contents>
+          <has_text text=",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3969539340,2545200385,225275747,2955003506,2353669245,2666669453,1672513023,3779563470,1301843222,2161147266,607954140,3680021500,2914087704,1062106200,3673111880,1314942441,1367997025,3293595301,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1528212814,2110459436,4160823845,1648892875,2084418558,1638162324,469721942,1317894045,1973458150,926214622,2197498164,398274060,,,,,,,,,,1123870984,278162969,490843778,3950769715,,,,,,4203409135,3569491948,,,,,,,1052128508,,,1510445340,,,4065472468,,,,,,1495737522,,,,,,,,3076491138,712233770,3105746335,625241463,3016847250,1928860657,2229984332,1341416065,2978539204,1175502179,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1328452594,2372254687,2640609716,3051501604,3258707132,,,,,,,,,,,,1007978530,,2019769394,1109678443,,,,,,,,,"/>
+        </assert_contents>
+      </output>
+      <output 
+        name="output_prediction_tab" 
+        value="sistr-results.tab"
+        ftype="tabular"
+        lines_diff="2">
+        <assert_contents>
+          <has_text text="AE014613-699860" />
+          <has_text text="Typhi" />
+          <has_text text="enterica" />
+          <has_text text="-:-:-" />
+          <has_n_columns n="19" />
+        </assert_contents>
+      </output>
+      <output 
+        name="alleles_output" 
+        value="alleles-output.json"
+        ftype="json"
+        compare="sim_size"/>
+    </test>
+    <test>
+      <param name="input_fastas" value="13-1101-Paratyphi_B.fasta"/>
+      <param name="output_format" value="tab"/>
+      <output 
+        name="novel_alleles" 
+        value="novel-alleles-13-1101.fasta"
+        ftype="fasta"
+        compare="sim_size"/>
+      <output 
+        name="cgmlst_profiles" 
+        value="cgmlst-profiles-13-1101.csv"
+        ftype="csv"
+        lines_diff="2">
+      </output>
+      <output 
+        name="output_prediction_tab" 
+        value="sistr-results-13-1101.tab"
+        ftype="tabular"
+        lines_diff="2">
+        <assert_contents>
+          <has_text text="13-1101-Paratyphi_B" />
+          <has_text text="Paratyphi B var. Java" />
+          <has_text text="enterica" />
+          <has_text text="1,4,[5],12" />
+          <has_text text="PASS" />
+          <has_text text="2375035975"/>
+          <has_n_columns n="21" />
+        </assert_contents>
+      </output>
+      <output 
+        name="alleles_output" 
+        value="alleles-output-13-1101.json"
+        ftype="json"
+        compare="sim_size"/>
+    </test>
+    <test>
+      <param name="input_fastas" value="13-1101 Paratyphi_B.fasta"/>
+      <param name="output_format" value="tab"/>
+      <output 
+        name="novel_alleles" 
+        value="novel-alleles-13-1101.fasta"
+        ftype="fasta"
+        compare="sim_size"/>
+      <output 
+        name="cgmlst_profiles" 
+        value="cgmlst-profiles-13-1101.csv"
+        ftype="csv"
+        lines_diff="2">
+      </output>
+      <output 
+        name="output_prediction_tab" 
+        value="sistr-results-13-1101.tab"
+        ftype="tabular"
+        lines_diff="2">
+        <assert_contents>
+          <has_text text="13-1101 Paratyphi_B" />
+          <has_text text="Paratyphi B var. Java" />
+          <has_text text="enterica" />
+          <has_text text="1,4,[5],12" />
+          <has_text text="PASS" />
+          <has_text text="2375035975"/>
+          <has_n_columns n="21" />
+        </assert_contents>
+      </output>
+      <output 
+        name="alleles_output" 
+        value="alleles-output-13-1101.json"
+        ftype="json"
+        compare="sim_size"/>
+    </test>
+  </tests>
+  <help>
+  <![CDATA[
+
+Usage::
+
+    usage: sistr_cmd [-h] [-i fasta_path genome_name] [-f OUTPUT_FORMAT]
+                     [-o OUTPUT_PREDICTION] [-p CGMLST_PROFILES]
+                     [-n NOVEL_ALLELES] [-a ALLELES_OUTPUT] [-T TMP_DIR] [-K]
+                     [--use-full-cgmlst-db] [--no-cgmlst] [-m] [--qc] [-t THREADS]
+                     [-v] [-V]
+                     [F [F ...]]
+
+    SISTR (Salmonella In Silico Typing Resource) Command-line Tool
+    ==============================================================
+    Serovar predictions from whole-genome sequence assemblies by determination of antigen gene and cgMLST gene alleles using BLAST.
+
+    Note about using the "--use-full-cgmlst-db" flag:
+        The "centroid" allele database is ~10% the size of the full set so analysis is much quicker with the "centroid" vs "full" set of alleles. Results between 2 cgMLST allele sets should not differ.
+
+    If you find this program useful in your research, please cite as:
+
+    The Salmonella In Silico Typing Resource (SISTR): an open web-accessible tool for rapidly typing and subtyping draft Salmonella genome assemblies.
+    Catherine Yoshida, Peter Kruczkiewicz, Chad R. Laing, Erika J. Lingohr, Victor P.J. Gannon, John H.E. Nash, Eduardo N. Taboada.
+    PLoS ONE 11(1): e0147101. doi: 10.1371/journal.pone.0147101
+
+    positional arguments:
+      F                     Input genome FASTA file
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      -i fasta_path genome_name, --input-fasta-genome-name fasta_path genome_name
+                            fasta file path to genome name pair
+      -f OUTPUT_FORMAT, --output-format OUTPUT_FORMAT
+                            Output format (json, csv, pickle)
+      -o OUTPUT_PREDICTION, --output-prediction OUTPUT_PREDICTION
+                            SISTR serovar prediction output path
+      -p CGMLST_PROFILES, --cgmlst-profiles CGMLST_PROFILES
+                            Output CSV file destination for cgMLST allelic
+                            profiles
+      -n NOVEL_ALLELES, --novel-alleles NOVEL_ALLELES
+                            Output FASTA file destination of novel cgMLST alleles
+                            from input genomes
+      -a ALLELES_OUTPUT, --alleles-output ALLELES_OUTPUT
+                            Output path of allele sequences and info to JSON
+      -T TMP_DIR, --tmp-dir TMP_DIR
+                            Base temporary working directory for intermediate
+                            analysis files.
+      -K, --keep-tmp        Keep temporary analysis files.
+      --use-full-cgmlst-db  Use the full set of cgMLST alleles which can include
+                            highly similar alleles. By default the smaller
+                            "centroid" alleles or representative alleles are used
+                            for each marker.
+      --no-cgmlst           Do not run cgMLST serovar prediction
+      -m, --run-mash        Determine Mash MinHash genomic distances to Salmonella
+                            genomes with trusted serovar designations. Mash binary
+                            must be in accessible via $PATH (e.g. /usr/bin).
+      --qc                  Perform basic QC to provide level of confidence in
+                            serovar prediction results.
+      -t THREADS, --threads THREADS
+                            Number of parallel threads to run sistr_cmd analysis.
+      -v, --verbose         Logging verbosity level (-v == show warnings; -vvv ==
+                            show debug info)
+      -V, --version         show program's version number and exit
+]]>
+  
+  </help>
+  <citations>
+    <!-- Citation for SISTR PLOS ONE paper -->
+    <citation type="doi">10.1371/journal.pone.0147101</citation>
+  </citations>
+</tool>