Mercurial > repos > jowong > khmer_load_into_counting
changeset 0:69d52f45c2fa draft
planemo upload
author | jowong |
---|---|
date | Thu, 22 Nov 2018 09:49:16 -0500 |
parents | |
children | f491d18251e9 |
files | khmer_load_into_counting.xml |
diffstat | 1 files changed, 147 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/khmer_load_into_counting.xml Thu Nov 22 09:49:16 2018 -0500 @@ -0,0 +1,147 @@ +<tool id="khmer_load_into_counting" name="Load Into Counting" version="0.1.5"> + <description>Creates k-mer countgraph using reads</description> + <requirements> + <requirement type="package" version="2.1.2">khmer</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + load-into-counting.py + $advanced.b + $advanced.small + -k $advanced.kmer + -U $advanced.ukmer + #if str( $advanced.max_mem ) != "0" + -M $advanced.max_mem + #end if + #if str( $advanced.fp_rate ) != "0" + --fp-rate $advanced.fp_rate + #end if + #if str( $advanced.num_table ) != "0" + -N $advanced.num_table + #end if + #if str( $advanced.table_size ) != "0" + -x $advanced.table_size + #end if + hash.ct.gz + #if str( $data_input.data_selector ) == "paired" + $data_input.input1.forward $data_input.input1.reverse + #end if + #if str( $data_input.data_selector ) == "single" + $data_input.input2 + #end if + ]]></command> + <inputs> + <conditional name="data_input"> + <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset"> + <option value="paired">Paired</option> + <option value="single">Single</option> + </param> + <when value="paired"> + <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/> + </when> + <when value="single"> + <param name="input2" format="data" type="data" label="input" help="Specify dataset with single reads"/> + </when> + </conditional> + <section name="advanced" title="Advanced options" expanded="false"> + <param type="boolean" argument="--small" label="Small count" checked="false" truevalue="--small-count" falsevalue="" help="Reduce memory usage by using a smaller counter for individual kmers. (default: False)"/> + <param type="boolean" argument="-b" label="big count" checked="True" truevalue="-b" falsevalue="--no-bigcount" help="Reduce memory usage by using a smaller counter for individual kmers. (default: True)"/> + <param name="thread" type="integer" value="1" label="Threads" help="Number of simultaneous threads to execute (default: 1)" /> + <param name="kmer" type="integer" value="32" label="K-mer" help="K-mer size to use (default: 32)" /> + <param name="ukmer" type="integer" value="0" label="Unique K-mer" help="approximate number of unique kmers in the input set (default: 0)" /> + <param name="num_table" type="integer" value="0" label="Number of tables" help="number of tables to use (default: False)" /> + <param name="table_size" type="integer" value="0" label="Maximum size of table" help="maximum size of tables to use (default: False)" /> + <param name="max_mem" type="integer" value="0" label="Maximum Memory" help="number of tables to use (default: False)" /> + <param name="fp_rate" type="integer" value="0" label="FP rate override" help="Override the automatic FP rate setting for the current script" /> + </section> + </inputs> + <outputs> + <data name="hash_output" label="Hash output" format="data" from_work_dir="hash.ct.gz"/> + </outputs> + <help><![CDATA[ +|| This is the script load-into-counting.py in khmer. +|| You are running khmer version 2.1.2 +|| You are also using screed version 1.0 +|| +|| If you use this script in a publication, please cite EACH of the following: +|| +|| * MR Crusoe et al., 2015. http://dx.doi.org/10.12688/f1000research.6924.1 +|| * Q Zhang et al., http://dx.doi.org/10.1371/journal.pone.0101271 +|| * A. Döring et al. http://dx.doi.org:80/10.1186/1471-2105-9-11 +|| +|| Please see http://khmer.readthedocs.io/en/latest/citations.html for details. + +usage: load-into-counting.py [--version] [--info] [-h] [-k KSIZE] + [-U UNIQUE_KMERS] [--fp-rate FP_RATE] + [-M MAX_MEMORY_USAGE] [--small-count] + [-T THREADS] [-b] [-s FORMAT] [-f] [-q] + output_countgraph_filename + input_sequence_filename + [input_sequence_filename ...] + +Build a k-mer countgraph from the given sequences. + +positional arguments: + output_countgraph_filename + The name of the file to write the k-mer countgraph to. + input_sequence_filename + The names of one or more FAST[AQ] input sequence + files. + +optional arguments: + --version show program's version number and exit + --info print citation information + -h, --help show this help message and exit + -k KSIZE, --ksize KSIZE + k-mer size to use (default: 32) + -U UNIQUE_KMERS, --unique-kmers UNIQUE_KMERS + approximate number of unique kmers in the input set + (default: 0) + --fp-rate FP_RATE Override the automatic FP rate setting for the current + script (default: None) + -M MAX_MEMORY_USAGE, --max-memory-usage MAX_MEMORY_USAGE + maximum amount of memory to use for data structure + (default: None) + --small-count Reduce memory usage by using a smaller counter for + individual kmers. (default: False) + -T THREADS, --threads THREADS + Number of simultaneous threads to execute (default: 1) + -b, --no-bigcount The default behaviour is to count past 255 using + bigcount. This flag turns bigcount off, limiting + counts to 255. (default: True) + -s FORMAT, --summary-info FORMAT + What format should the machine readable run summary be + in? (`json` or `tsv`, disabled by default) (default: + None) + -f, --force Overwrite output file if it exists (default: False) + -q, --quiet + +Note: with `-b`/`--no-bigcount` the output will be the exact size of the k-mer +countgraph and this script will use a constant amount of memory. In exchange +k-mer counts will stop at 255. The memory usage of this script with `-b` will +be about 1.15x the product of the `-x` and `-N` numbers. + +Example: + + load-into-counting.py -k 20 -x 5e7 out data/100k-filtered.fa + +Multiple threads can be used to accelerate the process, if you have extra cores +to spare. + +Example: + + load-into-counting.py -k 20 -x 5e7 -T 4 out data/100k-filtered.fa + + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubkhmer, + author = {Crusoe, Michael}, + year = {2015}, + title = {khmer}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/dib-lab/khmer}, +}</citation> + </citations> +</tool>