annotate dedup_hash.xml @ 0:f33e9e6a6c88 draft default tip

planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
author mvdbeek
date Wed, 23 Nov 2016 07:49:05 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
1 <tool id="dedup_hash" name="Deduplicate FASTQ files" version="0.1.1">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
2 <description>with fast and memory-efficient sequence hashes</description>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
3 <requirements>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
4 <requirement type="package" version="0.150.1">smhasher</requirement>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
5 </requirements>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
6 <command><![CDATA[
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
7 python '$__tool_directory__/dedup_hash/dedup_hash.py'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
8 #if str($readtype.single_or_paired) == "se":
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
9 --r1_in '${readtype.input_single}'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
10 --r1_out '$output_single'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
11 #elif str($readtype.single_or_paired) == "pe_sep":
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
12 --r1_in '${readtype.input_paired1}'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
13 --r2_in '${readtype.input_paired2}'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
14 --r1_out '$output_paired1'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
15 --r2_out '$output_paired2'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
16 #else
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
17 --r1_in '${readtype.input_paired.forward}'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
18 --r2_in '${readtype.input_paired.reverse}'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
19 --r1_out '${output_paired_coll.forward}'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
20 --r2_out '${output_paired_coll.reverse}'
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
21 #end if
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
22 $compress_fastq
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
23 ]]></command>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
24 <inputs>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
25 <conditional name="readtype">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
26 <param name="single_or_paired" type="select" label="Single-end or paired-end reads?">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
27 <option value="se" selected="true">Single-end</option>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
28 <option value="pe_sep">Paired-end (two separate input files)</option>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
29 <option value="pe_collection">Paired-end (as collection)</option>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
30 </param>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
31 <when value="se">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
32 <param format="fastq,fastq.gz" name="input_single" type="data" label="Single-end FASTQ reads" help="(-f)" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
33 </when>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
34 <when value="pe_sep">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
35 <param format="fastq,fastq.gz" name="input_paired1" type="data" label="Paired-end forward strand FASTQ reads" help="(-f)" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
36 <param format="fastq,fastq.gz" name="input_paired2" type="data" label="Paired-end reverse strand FASTQ reads" help="(-r)" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
37 </when>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
38 <when value="pe_collection">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
39 <param name="input_paired" format="fastq,fastq.gz" type="data_collection" collection_type="paired" label="Paired-end FASTQ reads as paired collection" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
40 </when>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
41 </conditional>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
42 <param name="compress_fastq" type="boolean" checked="true" truevalue="--write_gzip" falsevalue="" label="Produce compressed fastq?"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
43 </inputs>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
44 <outputs>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
45 <data name="output_single" format="fastq" label="Single-end output of ${tool.name} on ${on_string}">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
46 <filter>readtype['single_or_paired'] == 'se'</filter>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
47 <change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
48 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
49 </change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
50 </data>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
51 <data name="output_paired1" format="fastq" label="Paired-end forward strand output of ${tool.name} on ${on_string}">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
52 <filter>readtype['single_or_paired'] == 'pe_sep'</filter>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
53 <change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
54 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
55 </change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
56 </data>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
57 <data name="output_paired2" format="fastq" label="Paired-end reverse strand output of ${tool.name} on ${on_string}">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
58 <filter>readtype['single_or_paired'] == 'pe_sep'</filter>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
59 <change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
60 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
61 </change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
62 </data>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
63 <collection name="output_paired_coll" type="paired" structured_like="readtype.pe_collection" label="Paired-end output of ${tool.name} on ${on_string}">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
64 <filter>readtype['single_or_paired'] == 'pe_collection'</filter>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
65 <data name="forward" format="fastq">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
66 <change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
67 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
68 </change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
69 </data>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
70 <data name="reverse" format="fastq">
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
71 <change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
72 <when input="compress_fastq" value="--write_gzip" format="fastq.gz" />
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
73 </change_format>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
74 </data>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
75 </collection>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
76 </outputs>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
77 <tests>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
78 <test>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
79 <param name="single_or_paired" value="pe_sep"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
80 <param name="input_paired1" value="r1.fastq.gz" ftype="fastq.gz"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
81 <param name="input_paired2" value="r2.fastq.gz" ftype="fastq.gz"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
82 <param name="compress_fastq" value="--write_gzip"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
83 <output name="output_paired1" file="r1_dedup.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
84 <output name="output_paired2" file="r2_dedup.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
85 </test>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
86 <test>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
87 <param name="single_or_paired" value="pe_sep"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
88 <param name="input_paired1" value="r1.fastq" ftype="fastq"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
89 <param name="input_paired2" value="r2.fastq" ftype="fastq"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
90 <param name="compress_fastq" value=""/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
91 <output name="output_paired1" file="r1_dedup.fastq" ftype="fastq"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
92 <output name="output_paired2" file="r2_dedup.fastq" ftype="fastq"/>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
93 </test>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
94 </tests>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
95 <help> <![CDATA[
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
96 **Deduplicate paired fastq** is a fast and memory-efficient tool for removal of duplicates in paired short DNA sequence reads in fastq format.
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
97 It identifies duplicates by concatenating the sequence of a readpair and calculating a short hash that uniquely identifies the concatenated sequence.
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
98 Sequences that are not unique (i.e a hash of the concatenated sequence has been seen previously) are being discarded.
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
99
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
100 Compared to fastuniq this tool requires only a fraction of the memory, but does not identify pairs that are identical,
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
101 except for a switch of R1 and R2. Such reads may nevertheless align to different places based on the seed-searching of the aligner,
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
102 so this may or may not be a problem for your application.
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
103
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
104 Fastuniq consumed 76 GB of memory and took 4:01.52 on a typical dataset of 100nt 25 x 10^6 paired end reads,
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
105 while this tool took 4.7GB of memory and 3:23.27 for the same dataset.
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
106
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
107 Both tools produced the exact same result, arguing that, at least before quality and/or adapter trimming,
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
108 the previously mentioned limitations are of theoretical concern.
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
109
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
110 ]]> </help>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
111 <citations>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
112 <citation type="doi">doi:10.1371/journal.pone.0052249</citation>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
113 </citations>
f33e9e6a6c88 planemo upload for repository https://github.com/mvdbeek/dedup_hash commit 367da560c5924d56c39f91ef9c731e523825424b-dirty
mvdbeek
parents:
diff changeset
114 </tool>