0
|
1 <tool id="khmer_load_into_counting" name="Load Into Counting" version="0.1.5">
|
|
2 <description>Creates k-mer countgraph using reads</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="2.1.2">khmer</requirement>
|
|
5 </requirements>
|
|
6 <command detect_errors="exit_code"><![CDATA[
|
|
7 load-into-counting.py
|
|
8 $advanced.b
|
|
9 $advanced.small
|
|
10 -k $advanced.kmer
|
|
11 -U $advanced.ukmer
|
|
12 #if str( $advanced.max_mem ) != "0"
|
|
13 -M $advanced.max_mem
|
|
14 #end if
|
|
15 #if str( $advanced.fp_rate ) != "0"
|
|
16 --fp-rate $advanced.fp_rate
|
|
17 #end if
|
|
18 #if str( $advanced.num_table ) != "0"
|
|
19 -N $advanced.num_table
|
|
20 #end if
|
|
21 #if str( $advanced.table_size ) != "0"
|
|
22 -x $advanced.table_size
|
|
23 #end if
|
|
24 hash.ct.gz
|
|
25 #if str( $data_input.data_selector ) == "paired"
|
|
26 $data_input.input1.forward $data_input.input1.reverse
|
|
27 #end if
|
|
28 #if str( $data_input.data_selector ) == "single"
|
|
29 $data_input.input2
|
|
30 #end if
|
|
31 ]]></command>
|
|
32 <inputs>
|
|
33 <conditional name="data_input">
|
|
34 <param name="data_selector" type="select" label="Single or Paired-end Data" help="Select between paired and single end data to add name to dataset">
|
|
35 <option value="paired">Paired</option>
|
|
36 <option value="single">Single</option>
|
|
37 </param>
|
|
38 <when value="paired">
|
|
39 <param name="input1" format="data" type="data_collection" collection_type="paired" label="Select a paired collection" help="a paired data"/>
|
|
40 </when>
|
|
41 <when value="single">
|
|
42 <param name="input2" format="data" type="data" label="input" help="Specify dataset with single reads"/>
|
|
43 </when>
|
|
44 </conditional>
|
|
45 <section name="advanced" title="Advanced options" expanded="false">
|
|
46 <param type="boolean" argument="--small" label="Small count" checked="false" truevalue="--small-count" falsevalue="" help="Reduce memory usage by using a smaller counter for individual kmers. (default: False)"/>
|
|
47 <param type="boolean" argument="-b" label="big count" checked="True" truevalue="-b" falsevalue="--no-bigcount" help="Reduce memory usage by using a smaller counter for individual kmers. (default: True)"/>
|
|
48 <param name="thread" type="integer" value="1" label="Threads" help="Number of simultaneous threads to execute (default: 1)" />
|
|
49 <param name="kmer" type="integer" value="32" label="K-mer" help="K-mer size to use (default: 32)" />
|
|
50 <param name="ukmer" type="integer" value="0" label="Unique K-mer" help="approximate number of unique kmers in the input set (default: 0)" />
|
|
51 <param name="num_table" type="integer" value="0" label="Number of tables" help="number of tables to use (default: False)" />
|
|
52 <param name="table_size" type="integer" value="0" label="Maximum size of table" help="maximum size of tables to use (default: False)" />
|
|
53 <param name="max_mem" type="integer" value="0" label="Maximum Memory" help="number of tables to use (default: False)" />
|
|
54 <param name="fp_rate" type="integer" value="0" label="FP rate override" help="Override the automatic FP rate setting for the current script" />
|
|
55 </section>
|
|
56 </inputs>
|
|
57 <outputs>
|
|
58 <data name="hash_output" label="Hash output" format="data" from_work_dir="hash.ct.gz"/>
|
|
59 </outputs>
|
|
60 <help><![CDATA[
|
|
61 || This is the script load-into-counting.py in khmer.
|
|
62 || You are running khmer version 2.1.2
|
|
63 || You are also using screed version 1.0
|
|
64 ||
|
|
65 || If you use this script in a publication, please cite EACH of the following:
|
|
66 ||
|
|
67 || * MR Crusoe et al., 2015. http://dx.doi.org/10.12688/f1000research.6924.1
|
|
68 || * Q Zhang et al., http://dx.doi.org/10.1371/journal.pone.0101271
|
|
69 || * A. Döring et al. http://dx.doi.org:80/10.1186/1471-2105-9-11
|
|
70 ||
|
|
71 || Please see http://khmer.readthedocs.io/en/latest/citations.html for details.
|
|
72
|
|
73 usage: load-into-counting.py [--version] [--info] [-h] [-k KSIZE]
|
|
74 [-U UNIQUE_KMERS] [--fp-rate FP_RATE]
|
|
75 [-M MAX_MEMORY_USAGE] [--small-count]
|
|
76 [-T THREADS] [-b] [-s FORMAT] [-f] [-q]
|
|
77 output_countgraph_filename
|
|
78 input_sequence_filename
|
|
79 [input_sequence_filename ...]
|
|
80
|
|
81 Build a k-mer countgraph from the given sequences.
|
|
82
|
|
83 positional arguments:
|
|
84 output_countgraph_filename
|
|
85 The name of the file to write the k-mer countgraph to.
|
|
86 input_sequence_filename
|
|
87 The names of one or more FAST[AQ] input sequence
|
|
88 files.
|
|
89
|
|
90 optional arguments:
|
|
91 --version show program's version number and exit
|
|
92 --info print citation information
|
|
93 -h, --help show this help message and exit
|
|
94 -k KSIZE, --ksize KSIZE
|
|
95 k-mer size to use (default: 32)
|
|
96 -U UNIQUE_KMERS, --unique-kmers UNIQUE_KMERS
|
|
97 approximate number of unique kmers in the input set
|
|
98 (default: 0)
|
|
99 --fp-rate FP_RATE Override the automatic FP rate setting for the current
|
|
100 script (default: None)
|
|
101 -M MAX_MEMORY_USAGE, --max-memory-usage MAX_MEMORY_USAGE
|
|
102 maximum amount of memory to use for data structure
|
|
103 (default: None)
|
|
104 --small-count Reduce memory usage by using a smaller counter for
|
|
105 individual kmers. (default: False)
|
|
106 -T THREADS, --threads THREADS
|
|
107 Number of simultaneous threads to execute (default: 1)
|
|
108 -b, --no-bigcount The default behaviour is to count past 255 using
|
|
109 bigcount. This flag turns bigcount off, limiting
|
|
110 counts to 255. (default: True)
|
|
111 -s FORMAT, --summary-info FORMAT
|
|
112 What format should the machine readable run summary be
|
|
113 in? (`json` or `tsv`, disabled by default) (default:
|
|
114 None)
|
|
115 -f, --force Overwrite output file if it exists (default: False)
|
|
116 -q, --quiet
|
|
117
|
|
118 Note: with `-b`/`--no-bigcount` the output will be the exact size of the k-mer
|
|
119 countgraph and this script will use a constant amount of memory. In exchange
|
|
120 k-mer counts will stop at 255. The memory usage of this script with `-b` will
|
|
121 be about 1.15x the product of the `-x` and `-N` numbers.
|
|
122
|
|
123 Example:
|
|
124
|
|
125 load-into-counting.py -k 20 -x 5e7 out data/100k-filtered.fa
|
|
126
|
|
127 Multiple threads can be used to accelerate the process, if you have extra cores
|
|
128 to spare.
|
|
129
|
|
130 Example:
|
|
131
|
|
132 load-into-counting.py -k 20 -x 5e7 -T 4 out data/100k-filtered.fa
|
|
133
|
|
134
|
|
135 ]]></help>
|
|
136 <citations>
|
|
137 <citation type="bibtex">
|
|
138 @misc{githubkhmer,
|
|
139 author = {Crusoe, Michael},
|
|
140 year = {2015},
|
|
141 title = {khmer},
|
|
142 publisher = {GitHub},
|
|
143 journal = {GitHub repository},
|
|
144 url = {https://github.com/dib-lab/khmer},
|
|
145 }</citation>
|
|
146 </citations>
|
|
147 </tool>
|