annotate tools/picard/rgPicardMarkDups.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool name="Mark Duplicate reads" id="rgPicardMarkDups" version="0.01">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 picard_wrapper.py -i "$input_file" -n "$out_prefix" --tmpdir "${__new_file_path__}" -o "$out_file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 --remdups "$remDups" --assumesorted "$assumeSorted" --readregex "$readRegex" --optdupdist "$optDupeDist"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 -j "${GALAXY_DATA_INDEX_DIR}/shared/jars/MarkDuplicates.jar" -d "$html_file.files_path" -t "$html_file" -e "$input_file.ext"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 <requirements><requirement type="package">picard</requirement></requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <param format="bam,sam" name="input_file" type="data" label="SAM/BAM dataset to mark duplicates in"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 help="If empty, upload or import a SAM/BAM dataset."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 <param name="out_prefix" value="Dupes Marked" type="text"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 label="Title for the output file" help="Use this remind you what the job was for" size="80" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <param name="remDups" value="false" type="boolean" label="Remove duplicates from output file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 truevalue="true" falsevalue="false" checked="yes"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 help="If true do not write duplicates to the output file instead of writing them with appropriate flags set." />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 <param name="assumeSorted" value="true" type="boolean" label="Assume reads are already ordered"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 truevalue="true" falsevalue="false" checked="yes"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 help="If true assume input data are already sorted (most Galaxy SAM/BAM should be)." />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" type="text" size="80"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 label="Regular expression that can be used to parse read names in the incoming SAM file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 help="Names are parsed to extract: tile/region, x coordinate and y coordinate, to estimate optical duplication rate" >
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <sanitizer>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <valid initial="string.printable">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 <remove value="&apos;"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 </valid>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <mapping initial="none">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <add source="&apos;" target="__sq__"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 </mapping>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 </sanitizer>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 <param name="optDupeDist" value="100" type="integer"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 label="The maximum offset between two duplicate clusters in order to consider them optical duplicates." size="5"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 help="e.g. 5-10 pixels. Later Illumina software versions multiply pixel values by 10, in which case 50-100." >
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 <validator type="in_range" message="Minimum optical dupe distance must be positive" min="0" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <data format="bam" name="out_file" label="MarkDups_${out_prefix}.bam"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <data format="html" name="html_file" label="MarkDups_${out_prefix}.html"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <param name="input_file" value="picard_input_tiny_coord.bam" ftype="bam" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <param name="out_prefix" value="Dupes Marked" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 <param name="remDups" value="false" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 <param name="assumeSorted" value="true" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 <param name="optDupeDist" value="100" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <output name="out_file" file="picard_output_markdups_sortedpairsam.bam" ftype="bam" compare="diff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 <param name="input_file" value="picard_input_tiny_coord.sam" ftype="sam" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 <param name="out_prefix" value="Dupes Marked" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 <param name="remDups" value="true" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 <param name="assumeSorted" value="true" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <param name="readRegex" value="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <param name="optDupeDist" value="100" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <output name="out_file" file="picard_output_markdups_remdupes.bam" ftype="bam" compare="diff" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 <output name="html_file" file="picard_output_markdups_sortedpairsam.html" ftype="html" lines_diff="75" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 **Purpose**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 Marks all duplicate reads in a provided SAM or BAM file and either removes them or flags them.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 **Picard documentation**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 This is a Galaxy wrapper for MarkDuplicates, a part of the external package Picard-tools_.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 **Inputs, outputs, and parameters**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 Picard documentation says (reformatted for Galaxy):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 .. csv-table:: Mark Duplicates docs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 :header-rows: 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 Option,Description
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 "INPUT=File","The input SAM or BAM file to analyze. Must be coordinate sorted. Required."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 "OUTPUT=File","The output file to right marked records to Required."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 "METRICS_FILE=File","File to write duplication metrics to Required."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 "REMOVE_DUPLICATES=Boolean","If true do not write duplicates to the output file instead of writing them with appropriate flags set. Default value: false."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 "ASSUME_SORTED=Boolean","If true, assume that the input file is coordinate sorted, even if the header says otherwise. Default value: false."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 "MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP=Integer","This option is obsolete. ReadEnds will always be spilled to disk. Default value: 50000."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=Integer","Maximum number of file handles to keep open when spilling read ends to disk."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 "READ_NAME_REGEX=String","Regular expression that can be used to parse read names in the incoming SAM file. Read names are parsed to extract three variables: tile/region, x coordinate and y coordinate. "
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 "OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer","The maximum offset between two duplicte clusters in order to consider them optical duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) unless using later versions of the Illumina pipeline that multiply pixel values by 10, in which case 50-100 is more normal. Default value: 100"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 **Warning on SAM/BAM quality**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 to be the only way to deal with SAM/BAM that cannot be parsed.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 **Note on the Regular Expression**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 (from the Picard docs)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 This tool requires a valid regular expression to parse out the read names in the incoming SAM or BAM file. These values are used to estimate the rate of optical duplication in order to give a more accurate estimated library size. The regular expression should contain three capture groups for the three variables, in order. Default value: [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged unless the remove duplicates option is selected. In some cases you may want to do this, but please only do this if you really understand what you are doing.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 </tool>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130