comparison picard_MarkDuplicatesWithMateCigar.xml @ 13:7e6fd3d0f16e draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/picard commit bf94a1505c131fb3f67c867b6e1d886780efa42e
author devteam
date Tue, 06 Dec 2016 10:04:41 -0500
parents 05087b27692a
children 465cbb0cf2eb
comparison
equal deleted inserted replaced
12:05087b27692a 13:7e6fd3d0f16e
4 <import>picard_macros.xml</import> 4 <import>picard_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[ 7 <command detect_errors="exit_code"><![CDATA[
8 @java_options@ 8 @java_options@
9 9 @symlink_element_identifier@
10 picard 10 picard
11 MarkDuplicatesWithMateCigar 11 MarkDuplicatesWithMateCigar
12 12
13 INPUT="${inputFile}" 13 INPUT='$inputFile.element_identifier'
14 OUTPUT="${outFile}" 14 OUTPUT="${outFile}"
15 15
16 METRICS_FILE="${metrics_file}" 16 METRICS_FILE="${metrics_file}"
17 COMMENT="${comment}" 17 COMMENT="${comment}"
18 18
19 MINIMUM_DISTANCE="${minimum_distance}" 19 MINIMUM_DISTANCE="${minimum_distance}"
20 SKIP_PAIRS_WITH_NO_MATE_CIGAR="${skip_pairs_with_no_mate_cigar}" 20 SKIP_PAIRS_WITH_NO_MATE_CIGAR="${skip_pairs_with_no_mate_cigar}"
21 21
22 22
23 REMOVE_DUPLICATES="${remove_duplicates}" 23 REMOVE_DUPLICATES="${remove_duplicates}"
24 ASSUME_SORTED="${assume_sorted}" 24 ASSUME_SORTED="${assume_sorted}"
25 25
26 DUPLICATE_SCORING_STRATEGY="${duplicate_scoring_strategy}" 26 DUPLICATE_SCORING_STRATEGY="${duplicate_scoring_strategy}"
27 27
28 #import pipes 28 #import pipes
29 READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" } 29 READ_NAME_REGEX=${ pipes.quote( str( $read_name_regex ) ) or "''" }
30 OPTICAL_DUPLICATE_PIXEL_DISTANCE="${optical_duplicate_pixel_distance}" 30 OPTICAL_DUPLICATE_PIXEL_DISTANCE="${optical_duplicate_pixel_distance}"
31 31
32 32
33 BLOCK_SIZE=100000 33 BLOCK_SIZE=100000
34 VALIDATION_STRINGENCY="${validation_stringency}" 34 VALIDATION_STRINGENCY="${validation_stringency}"
35 QUIET=true 35 QUIET=true
36 VERBOSITY=ERROR 36 VERBOSITY=ERROR
37 37
38 ]]></command> 38 ]]></command>
39 <inputs> 39 <inputs>
40 <param format="bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/> 40 <param format="bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
41 <param name="comment" type="text" label="Add this comment to BAM dataset"/> 41 <param name="comment" type="text" label="Add this comment to BAM dataset"/>
42 42
43 <param name="minimum_distance" type="integer" value="-1" label="The minimum distance to buffer records to account for clipping on the 5' end of the records" help="MINIMUM_DISTANCE; Set this number to -1 to use twice the first read's read length (or 100, whichever is smaller); default=-1"/> 43 <param name="minimum_distance" type="integer" value="-1" label="The minimum distance to buffer records to account for clipping on the 5' end of the records" help="MINIMUM_DISTANCE; Set this number to -1 to use twice the first read's read length (or 100, whichever is smaller); default=-1"/>
44 <param name="skip_pairs_with_no_mate_cigar" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Skip record pairs with no mate cigar and include them in the output" help="SKIP_PAIRS_WITH_NO_MATE_CIGAR; default=True"/> 44 <param name="skip_pairs_with_no_mate_cigar" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Skip record pairs with no mate cigar and include them in the output" help="SKIP_PAIRS_WITH_NO_MATE_CIGAR; default=True"/>
45 <param name="remove_duplicates" type="boolean" label="If true do not write duplicates to the output file instead of writing them with appropriate flags set" help="REMOVE_DUPLICATES; default=False"/> 45 <param name="remove_duplicates" type="boolean" label="If true do not write duplicates to the output file instead of writing them with appropriate flags set" help="REMOVE_DUPLICATES; default=False"/>
46 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED; default=True"/> 46 <param name="assume_sorted" type="boolean" label="Assume the input file is already sorted" checked="true" truevalue="true" falsevalue="false" help="ASSUME_SORTED; default=True"/>
47 47
58 </sanitizer> 58 </sanitizer>
59 </param> 59 </param>
60 <param name="optical_duplicate_pixel_distance" type="integer" value="100" min="0" max="500" label="The maximum offset between two duplicte clusters in order to consider them optical duplicates" help="OPTICAL_DUPLICATE_PIXEL_DISTANCE; default=100"/> 60 <param name="optical_duplicate_pixel_distance" type="integer" value="100" min="0" max="500" label="The maximum offset between two duplicte clusters in order to consider them optical duplicates" help="OPTICAL_DUPLICATE_PIXEL_DISTANCE; default=100"/>
61 61
62 <expand macro="VS" /> 62 <expand macro="VS" />
63 63
64 </inputs> 64 </inputs>
65 65
66 <outputs> 66 <outputs>
67 <data format="txt" name="metrics_file" label="${tool.name} on ${on_string}: MarkDuplicate metrics"/> 67 <data format="txt" name="metrics_file" label="${tool.name} on ${on_string}: MarkDuplicate metrics"/>
68 <data format="bam" name="outFile" label="${tool.name} on ${on_string}: MarkDuplicates BAM output"/> 68 <data format="bam" name="outFile" label="${tool.name} on ${on_string}: MarkDuplicates BAM output"/>
69 </outputs> 69 </outputs>
70 70
71 <tests> 71 <tests>
72 <test> 72 <test>
73 <param name="inputFile" value="picard_MarkDuplicatesWithMateCigar.bam" ftype="bam"/> 73 <param name="inputFile" value="picard_MarkDuplicatesWithMateCigar.bam" ftype="bam"/>
74 <param name="minimum_distance" value="-1"/> 74 <param name="minimum_distance" value="-1"/>
75 <param name="skip_pairs_with_no_mate_cigar" value="True"/> 75 <param name="skip_pairs_with_no_mate_cigar" value="True"/>
81 <param name="duplicate_scoring_strategy" value="TOTAL_MAPPED_REFERENCE_LENGTH"/> 81 <param name="duplicate_scoring_strategy" value="TOTAL_MAPPED_REFERENCE_LENGTH"/>
82 <param name="validation_stringency" value="LENIENT"/> 82 <param name="validation_stringency" value="LENIENT"/>
83 <output name="outFile" file="picard_MarkDuplicatesWithMateCigar_test1.bam" ftype="bam" lines_diff="4"/> 83 <output name="outFile" file="picard_MarkDuplicatesWithMateCigar_test1.bam" ftype="bam" lines_diff="4"/>
84 </test> 84 </test>
85 </tests> 85 </tests>
86 86
87 87
88 <help> 88 <help>
89 89
90 **Purpose** 90 **Purpose**
91 91
92 Examines aligned records in the supplied SAM or BAM dataset to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged. 92 Examines aligned records in the supplied SAM or BAM dataset to locate duplicate molecules. All records are then written to the output file with the duplicate records flagged.
108 108
109 @dataset_collections@ 109 @dataset_collections@
110 110
111 @description@ 111 @description@
112 112
113 MINIMUM_DISTANCE=Integer The minimum distance to buffer records to account for clipping on the 5' end of the 113 MINIMUM_DISTANCE=Integer The minimum distance to buffer records to account for clipping on the 5' end of the
114 records.Set this number to -1 to use twice the first read's read length (or 100, 114 records.Set this number to -1 to use twice the first read's read length (or 100,
115 whichever is smaller). Default value: -1. This option can be set to 'null' to clear the 115 whichever is smaller). Default value: -1. This option can be set to 'null' to clear the
116 default value. 116 default value.
117 117
118 SKIP_PAIRS_WITH_NO_MATE_CIGAR=Boolean 118 SKIP_PAIRS_WITH_NO_MATE_CIGAR=Boolean
119 Skip record pairs with no mate cigar and include them in the output. Default value: 119 Skip record pairs with no mate cigar and include them in the output. Default value:
120 true. This option can be set to 'null' to clear the default value. Possible values: 120 true. This option can be set to 'null' to clear the default value. Possible values:
121 {true, false} 121 {true, false}
122 122
123 COMMENT=String 123 COMMENT=String
124 CO=String Comment(s) to include in the output file's header. This option may be specified 0 or 124 CO=String Comment(s) to include in the output file's header. This option may be specified 0 or
125 more times. 125 more times.
126 126
127 REMOVE_DUPLICATES=Boolean If true do not write duplicates to the output file instead of writing them with 127 REMOVE_DUPLICATES=Boolean If true do not write duplicates to the output file instead of writing them with
128 appropriate flags set. Default value: false. 128 appropriate flags set. Default value: false.
129 129
130 READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read 130 READ_NAME_REGEX=String Regular expression that can be used to parse read names in the incoming SAM file. Read
131 names are parsed to extract three variables: tile/region, x coordinate and y coordinate. 131 names are parsed to extract three variables: tile/region, x coordinate and y coordinate.
132 These values are used to estimate the rate of optical duplication in order to give a more 132 These values are used to estimate the rate of optical duplication in order to give a more
133 accurate estimated library size. Set this option to null to disable optical duplicate 133 accurate estimated library size. Set this option to null to disable optical duplicate
134 detection. The regular expression should contain three capture groups for the three 134 detection. The regular expression should contain three capture groups for the three
135 variables, in order. It must match the entire read name. Note that if the default regex 135 variables, in order. It must match the entire read name. Note that if the default regex
136 is specified, a regex match is not actually done, but instead the read name is split on 136 is specified, a regex match is not actually done, but instead the read name is split on
137 colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be 137 colon character. For 5 element names, the 3rd, 4th and 5th elements are assumed to be
138 tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements 138 tile, x and y values. For 7 element names (CASAVA 1.8), the 5th, 6th, and 7th elements
139 are assumed to be tile, x and y values. Default value: 139 are assumed to be tile, x and y values. Default value:
140 [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*. 140 [a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*.
141 141
142 DUPLICATE_SCORING_STRATEGY=ScoringStrategy 142 DUPLICATE_SCORING_STRATEGY=ScoringStrategy
143 DS=ScoringStrategy The scoring strategy for choosing the non-duplicate among candidates. Default value: 143 DS=ScoringStrategy The scoring strategy for choosing the non-duplicate among candidates. Default value:
144 TOTAL_MAPPED_REFERENCE_LENGTH. Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH} 144 TOTAL_MAPPED_REFERENCE_LENGTH. Possible values: {SUM_OF_BASE_QUALITIES, TOTAL_MAPPED_REFERENCE_LENGTH}
145 145
146 OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer 146 OPTICAL_DUPLICATE_PIXEL_DISTANCE=Integer
147 The maximum offset between two duplicte clusters in order to consider them optical 147 The maximum offset between two duplicte clusters in order to consider them optical
148 duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels) 148 duplicates. This should usually be set to some fairly small number (e.g. 5-10 pixels)
149 unless using later versions of the Illumina pipeline that multiply pixel values by 10, in 149 unless using later versions of the Illumina pipeline that multiply pixel values by 10, in
150 which case 50-100 is more normal. Default value: 100. 150 which case 50-100 is more normal. Default value: 100.
151 151
152 @more_info@ 152 @more_info@
153 153
154 </help> 154 </help>
155 </tool> 155 </tool>