comparison nugen_nudup.xml @ 0:0ad51e73587e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nugen_nudup commit 9f2d2e8d94050274a4eaae7fa1e48887fed657d4
author iuc
date Fri, 02 Dec 2016 18:03:47 -0500
parents
children 24693e595caf
comparison
equal deleted inserted replaced
-1:000000000000 0:0ad51e73587e
1 <tool id="nugen_nudup" name="NuDUP" version="2.2_post2016104">
2 <description>mark/remove PCR duplicates based on molecular tags</description>
3 <requirements>
4 <requirement type="package" version="2.2_post2016104">nudup</requirement>
5 </requirements>
6 <stdio>
7 <exit_code range="1:" />
8 </stdio>
9 <version_command>nudup.py --version</version_command>
10 <command><![CDATA[
11 ln -f -s '$input' 'input.bam' &&
12 ln -f -s '$input.metadata.bam_index' 'input.bai' &&
13 nudup.py $paired_end
14 -f '$umi_fastq'
15 --start $start
16 --length $length
17 'input.bam'
18 ]]>
19 </command>
20 <inputs>
21 <param type="data" name="input" label="Input SAM/BAM file"
22 format="sam,bam" help="Input SAM/BAM containing only unique
23 alignments" />
24 <param type="data" name="umi_fastq" label="Fastq file containing
25 molecular tag sequence" format="fastq,fastqsanger" help="FASTQ
26 file containing the molecular tag sequence for each read name in
27 the corresponding SAM/BAM file" />
28 <param type="boolean" argument="--paired-end"
29 label="Paired-end deduping" name="paired_end"
30 truevalue="--paired-end" falsevalue=""
31 checked="false"
32 help="use paired end deduping with template. SAM/BAM alignment
33 must contain paired end reads. Degenerate read pairs
34 (alignments for one read of pair) will be discarded." />
35 <param type="integer" argument="--start" label="Tag sequence start
36 position from 3' end" value="6" help="position in index read where
37 molecular tag sequence begins. This should be a 1-based value that
38 counts in from the 3' END of the read." />
39 <param type="integer" argument="--length" label="Tag sequence length"
40 value="6" help="length of molecular tag sequence" />
41 </inputs>
42 <outputs>
43 <data format="bam" name="markdup" from_work_dir="prefix.sorted.markdup.bam" />
44 <data format="bam" name="dedup" from_work_dir="prefix.sorted.dedup.bam" />
45 <data format="txt" name="log" from_work_dir="prefix_dup_log.txt" />
46 </outputs>
47 <tests>
48 <test>
49 <param name="input" value="nudup_test_1.bam" ftype="bam" />
50 <param name="umi_fastq" value="nudup_umis.fastq" ftype="fastqsanger" />
51 <param name="start" value="8" />
52 <param name="length" value="8" />
53 <output name="markdup" file="nudup_markdup_1.bam" ftype="bam" />
54 <output name="dedup" file="nudup_dedup_1.bam" ftype="bam" />
55 <output name="log" file="nudup_log_1.txt" ftype="txt" />
56 </test>
57 </tests>
58 <help><![CDATA[
59 Marks/removes PCR introduced duplicate molecules based on the molecular tagging
60 technology used in NuGEN products.
61
62 For SINGLE END reads, duplicates are marked if they fulfill the following
63 criteria: a) start at the same genomic coordinate b) have the same strand
64 orientation c) have the same molecular tag sequence. The read with the
65 highest mapping quality is kept as the non-duplicate read.
66
67 For PAIRED END reads, duplicates are marked if they fulfill the following
68 criteria: a) start at the same genomic coordinate b) have the same template
69 length c) have the same molecular tag sequence. The read pair with the highest
70 mapping quality is kept as the non-duplicate read.
71
72 Author: Anand Patel
73
74 Contact: NuGEN Technologies Inc., techserv@nugen.com
75
76 ::
77
78 Input:
79 IN.sam|IN.bam input sorted/unsorted SAM/BAM containing only unique
80 alignments (sorted required for case 2 detailed above)
81
82 Options:
83 -2, --paired-end use paired end deduping with template. SAM/BAM
84 alignment must contain paired end reads. Degenerate
85 read pairs (alignments for one read of pair) will be
86 discarded.
87 -f INDEX.fq|READ.fq FASTQ file containing the molecular tag sequence for
88 each read name in the corresponding SAM/BAM file
89 (required only for CASE 1 detailed above)
90 -o OUT_PREFIX, --out OUT_PREFIX
91 prefix of output file paths for sorted BAMs (default
92 will create prefix.sorted.markdup.bam,
93 prefix.sorted.dedup.bam, prefix_dup_log.txt)
94 -s START, --start START
95 position in index read where molecular tag sequence
96 begins. This should be a 1-based value that counts in
97 from the 3' END of the read. (default = 6)
98 -l LENGTH, --length LENGTH
99 length of molecular tag sequence (default = 6)
100 -v, --version show program's version number and exit
101 -h, --help show this help message and exit
102 ]]></help>
103 <citations>
104 <citation type="bibtex">@misc{Patel2016,
105 author = {Patel, Anand},
106 title = {NuDUP},
107 year = {2016},
108 publisher = {GitHub},
109 journal = {GitHub repository},
110 howpublished = {\url{https://github.com/nugentechnologies/nudup}},
111 commit = {740d9fe439dd8917605a56483a8796b377eb24c6}
112 }
113 </citation>
114 </citations>
115 </tool>