annotate rgPicardMarkDups.xml @ 0:1cd7f3b42609

Uploaded tool.
author devteam
date Tue, 23 Oct 2012 13:14:29 -0400
parents
children 9227b8c3093b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
1 <tool name="Mark Duplicate reads" id="rgPicardMarkDups" version="1.56.0">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
2 <command interpreter="python">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
3 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" -o "$out_file"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
4 --remdups "$remDups" --assumesorted "$assumeSorted" --readregex "$readRegex" --optdupdist "$optDupeDist"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
5 -j "\$JAVA_JAR_PATH/MarkDuplicates.jar" -d "$html_file.files_path" -t "$html_file" -e "$input_file.ext"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
6 </command>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
7 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
8 <inputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
9 <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
10 help="If empty, upload or import a SAM/BAM dataset."/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
11 <param name="out_prefix" value="Dupes Marked" type="text"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
12 label="Title for the output file" help="Use this remind you what the job was for" size="80" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
13 <param name="remDups" value="false" type="boolean" label="Remove duplicates from output file"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
14 truevalue="true" falsevalue="false" checked="yes"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
15 help="If true do not write duplicates to the output file instead of writing them with appropriate flags set." />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
16 <param name="assumeSorted" value="true" type="boolean" label="Assume reads are already ordered"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
17 truevalue="true" falsevalue="false" checked="yes"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
18 help="If true assume input data are already sorted (most Galaxy SAM/BAM should be)." />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
19 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
20 label="Regular expression that can be used to parse read names in the incoming SAM file"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
21 help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
22 <sanitizer>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
23 <valid initial="string.printable">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
24 <remove value="&apos;"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
25 </valid>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
26 <mapping initial="none">
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
27 <add source="&apos;" target="__sq__"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
28 </mapping>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
29 </sanitizer>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
30 </param>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
31 <param name="optDupeDist" value="100" type="integer"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
32 label="The maximum offset between two duplicate clusters in order to consider them optical duplicates." size="5"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
33 help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100." >
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
34 <validator type="in_range" message="Minimum optical dupe distance must be positive" min="0" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
35 </param>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
36
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
37 </inputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
38 <outputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
39 <data format="bam" name="out_file" label="MarkDups_${out_prefix}.bam"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
40 <data format="html" name="html_file" label="MarkDups_${out_prefix}.html"/>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
41 </outputs>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
42 <tests>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
43 <test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
44 <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
45 <param name="out_prefix" value="Dupes Marked" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
46 <param name="remDups" value="false" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
47 <param name="assumeSorted" value="true" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
48 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
49 <param name="optDupeDist" value="100" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
50 <output name="out_file" file="picard_output_markdups_sortedpairsam.bam" ftype="bam" compare="diff" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
51 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
52 </test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
53 <test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
54 <param name="input_file" value="picard_input_tiny_coord.sam" ftype="sam" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
55 <param name="out_prefix" value="Dupes Marked" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
56 <param name="remDups" value="true" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
57 <param name="assumeSorted" value="true" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
58 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
59 <param name="optDupeDist" value="100" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
60 <output name="out_file" file="picard_output_markdups_remdupes.bam" ftype="bam" compare="diff" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
61 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
62 </test>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
63 </tests>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
64
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
65 <help>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
66
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
67 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
68
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
69 **Purpose**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
70
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
71 Marks all duplicate reads in a provided SAM or BAM file and either removes them or flags them.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
72
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
73 **Picard documentation**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
74
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
75 This is a Galaxy wrapper for MarkDuplicates, a part of the external package Picard-tools_.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
76
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
77 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
78
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
79 -----
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
80
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
81 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
82
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
83 **Inputs, outputs, and parameters**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
84
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
85 Picard documentation says (reformatted for Galaxy):
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
86
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
87 .. csv-table:: Mark Duplicates docs
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
88 :header-rows: 1
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
89
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
90 Option,Description
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
91 "INPUT=File","The input SAM or BAM file to analyze. Must be coordinate sorted. Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
92 "OUTPUT=File","The output file to right marked records to Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
93 "METRICS_FILE=File","File to write duplication metrics to Required."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
94 "REMOVE_DUPLICATES=Boolean","If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
95 "ASSUME_SORTED=Boolean","If true, assume that the input file is coordinate sorted, even if the header says otherwise. Default value: false."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
96 "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=Integer","This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
97 "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=Integer","Maximum number of file handles to keep open when spilling read ends to disk."
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
98 "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. "
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
99 "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100"
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
100
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
101 .. class:: warningmark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
102
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
103 **Warning on SAM/BAM quality**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
104
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
105 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
106 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
107 to be the only way to deal with SAM/BAM that cannot be parsed.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
108 .. class:: infomark
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
109
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
110 **Note on the Regular Expression**
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
111
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
112 (from the Picard docs)
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
113 This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
114
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
115 Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged unless the remove duplicates option is selected. In some cases you may want to do this, but please only do this if you really understand what you are doing.
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
116
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
117 </help>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
118 </tool>
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
119
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
120
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
121
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
122
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
123
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
124
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
125
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
126
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
127
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
128
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
129
1cd7f3b42609 Uploaded tool.
devteam
parents:
diff changeset
130