Mercurial > repos > jowong > khmer_load_into_counting

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/khmer_load_into_counting.xml	Thu Nov 22 09:49:16 2018 -0500
@@ -0,0 +1,147 @@
+<tool id="khmer_load_into_counting" name="Load Into Counting" version="0.1.5">
+    <description>Creates k-mer countgraph using reads</description>
+    <requirements>
+        <requirement type="package" version="2.1.2">khmer</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        load-into-counting.py
+        $advanced.b
+        $advanced.small
+        -k $advanced.kmer
+        -U $advanced.ukmer
+        #if str( $advanced.max_mem ) != "0"
+            -M $advanced.max_mem
+        #end if
+        #if str( $advanced.fp_rate ) != "0"
+            --fp-rate $advanced.fp_rate
+        #end if
+        #if str( $advanced.num_table ) != "0"
+            -N $advanced.num_table
+        #end if
+        #if str( $advanced.table_size ) != "0"
+            -x $advanced.table_size
+        #end if
+        hash.ct.gz
+        #if str( $data_input.data_selector ) == "paired"
+            $data_input.input1.forward $data_input.input1.reverse
+        #end if
+        #if str( $data_input.data_selector ) == "single"
+            $data_input.input2
+        #end if
+    ]]></command>
+    <inputs>
+        <conditional name="data_input">
+            <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset">
+                <option value="paired">Paired</option>
+                <option value="single">Single</option>
+            </param>
+            <when value="paired">
+                <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/>
+            </when>
+            <when value="single">
+                <param name="input2" format="data" type="data" label="input" help="Specify dataset with single reads"/>
+            </when>
+        </conditional>
+        <section name="advanced" title="Advanced options" expanded="false">
+            <param type="boolean" argument="--small" label="Small count" checked="false" truevalue="--small-count" falsevalue="" help="Reduce memory usage by using a smaller counter for individual kmers. (default: False)"/>
+            <param type="boolean" argument="-b" label="big count" checked="True" truevalue="-b" falsevalue="--no-bigcount" help="Reduce memory usage by using a smaller counter for individual kmers. (default: True)"/>
+            <param name="thread" type="integer" value="1" label="Threads" help="Number of simultaneous threads to execute (default: 1)" />
+            <param name="kmer" type="integer" value="32" label="K-mer" help="K-mer size to use (default: 32)" />
+            <param name="ukmer" type="integer" value="0" label="Unique K-mer" help="approximate number of unique kmers in the input set (default: 0)" />
+            <param name="num_table" type="integer" value="0" label="Number of tables" help="number of tables to use (default: False)" />
+            <param name="table_size" type="integer" value="0" label="Maximum size of table" help="maximum size of tables to use (default: False)" />
+            <param name="max_mem" type="integer" value="0" label="Maximum Memory" help="number of tables to use (default: False)" />
+            <param name="fp_rate" type="integer" value="0" label="FP rate override" help="Override the automatic FP rate setting for the current script" />
+        </section>
+    </inputs>
+    <outputs>
+        <data name="hash_output" label="Hash output" format="data" from_work_dir="hash.ct.gz"/>
+    </outputs>
+    <help><![CDATA[
+|| This is the script load-into-counting.py in khmer.
+|| You are running khmer version 2.1.2
+|| You are also using screed version 1.0
+||
+|| If you use this script in a publication, please cite EACH of the following:
+||
+||   * MR Crusoe et al., 2015. http://dx.doi.org/10.12688/f1000research.6924.1
+||   * Q Zhang et al., http://dx.doi.org/10.1371/journal.pone.0101271
+||   * A. Döring et al. http://dx.doi.org:80/10.1186/1471-2105-9-11
+||
+|| Please see http://khmer.readthedocs.io/en/latest/citations.html for details.
+
+usage: load-into-counting.py [--version] [--info] [-h] [-k KSIZE]
+                             [-U UNIQUE_KMERS] [--fp-rate FP_RATE]
+                             [-M MAX_MEMORY_USAGE] [--small-count]
+                             [-T THREADS] [-b] [-s FORMAT] [-f] [-q]
+                             output_countgraph_filename
+                             input_sequence_filename
+                             [input_sequence_filename ...]
+
+Build a k-mer countgraph from the given sequences.
+
+positional arguments:
+  output_countgraph_filename
+                        The name of the file to write the k-mer countgraph to.
+  input_sequence_filename
+                        The names of one or more FAST[AQ] input sequence
+                        files.
+
+optional arguments:
+  --version             show program's version number and exit
+  --info                print citation information
+  -h, --help            show this help message and exit
+  -k KSIZE, --ksize KSIZE
+                        k-mer size to use (default: 32)
+  -U UNIQUE_KMERS, --unique-kmers UNIQUE_KMERS
+                        approximate number of unique kmers in the input set
+                        (default: 0)
+  --fp-rate FP_RATE     Override the automatic FP rate setting for the current
+                        script (default: None)
+  -M MAX_MEMORY_USAGE, --max-memory-usage MAX_MEMORY_USAGE
+                        maximum amount of memory to use for data structure
+                        (default: None)
+  --small-count         Reduce memory usage by using a smaller counter for
+                        individual kmers. (default: False)
+  -T THREADS, --threads THREADS
+                        Number of simultaneous threads to execute (default: 1)
+  -b, --no-bigcount     The default behaviour is to count past 255 using
+                        bigcount. This flag turns bigcount off, limiting
+                        counts to 255. (default: True)
+  -s FORMAT, --summary-info FORMAT
+                        What format should the machine readable run summary be
+                        in? (`json` or `tsv`, disabled by default) (default:
+                        None)
+  -f, --force           Overwrite output file if it exists (default: False)
+  -q, --quiet
+
+Note: with `-b`/`--no-bigcount` the output will be the exact size of the k-mer
+countgraph and this script will use a constant amount of memory. In exchange
+k-mer counts will stop at 255. The memory usage of this script with `-b` will
+be about 1.15x the product of the `-x` and `-N` numbers.
+
+Example:
+
+    load-into-counting.py -k 20 -x 5e7 out data/100k-filtered.fa
+
+Multiple threads can be used to accelerate the process, if you have extra cores
+to spare.
+
+Example:
+
+    load-into-counting.py -k 20 -x 5e7 -T 4 out data/100k-filtered.fa
+
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubkhmer,
+  author = {Crusoe, Michael},
+  year = {2015},
+  title = {khmer},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/dib-lab/khmer},
+}</citation>
+    </citations>
+</tool>