comparison picard_ReorderSam.xml @ 5:3d4f1fa26f0e draft

Uploaded
author devteam
date Tue, 16 Dec 2014 19:03:21 -0500
parents 9227b8c3093b
children 3a3234d7a2e8
comparison
equal deleted inserted replaced
4:ab1f60c26526 5:3d4f1fa26f0e
1 <tool name="Reorder SAM/BAM" id="picard_ReorderSam" version="1.56.0"> 1 <tool name="ReorderSam" id="picard_ReorderSam" version="1.126.0">
2 <requirements><requirement type="package" version="1.56.0">picard</requirement></requirements> 2 <description>reorder reads to match ordering in reference sequences</description>
3 <command interpreter="python"> 3 <requirements><requirement type="package" version="1.126.0">picard</requirement></requirements>
4 picard_wrapper.py 4
5 --input="${inputFile}" 5 <macros>
6 #if $source.indexSource == "built-in" 6 <import>picard_macros.xml</import>
7 --ref="${source.ref.fields.path}" 7 </macros>
8 #else 8
9 --ref-file="${refFile}" 9 <command>
10 --species-name="${source.speciesName}" 10 @java_options@
11 --build-name="${source.buildName}" 11 #set $picard_dict = "localref.dict"
12 --trunc-names="${source.truncateSeqNames}" 12 #set $ref_fasta = "localref.fa" ## This is done because picards "likes" .fa extension
13 #end if 13
14 --allow-inc-dict-concord="${allowIncDictConcord}" 14 ln -s "${reference_source.ref_file}" "${ref_fasta}" &amp;&amp;
15 --allow-contig-len-discord="${allowContigLenDiscord}" 15
16 --output-format="${outputFormat}" 16 #if str( $reference_source.reference_source_selector ) == "history":
17 --output="${outFile}" 17
18 --tmpdir "${__new_file_path__}" 18 java -jar \$JAVA_JAR_PATH/picard.jar CreateSequenceDictionary REFERENCE="${ref_fasta}" OUTPUT="${picard_dict}"
19 -j "\$JAVA_JAR_PATH/ReorderSam.jar" 19 QUIET=true
20 VERBOSITY=ERROR
21
22 &amp;&amp;
23
24 #else:
25
26 #set $ref_fasta = str( $reference_source.ref_file.fields.path )
27
28 #end if
29
30 java -jar \$JAVA_JAR_PATH/picard.jar
31 ReorderSam
32 INPUT="${inputFile}"
33 OUTPUT="${outFile}"
34 REFERENCE="${ref_fasta}"
35 ALLOW_INCOMPLETE_DICT_CONCORDANCE="${allow_incomplete_dict_concordance}"
36 ALLOW_CONTIG_LENGTH_DISCORDANCE="${allow_contig_length_discordance}"
37
38 VALIDATION_STRINGENCY="${validation_stringency}"
39 QUIET=true
40 VERBOSITY=ERROR
41
20 </command> 42 </command>
43
21 <inputs> 44 <inputs>
22 <param format="bam,sam" name="inputFile" type="data" label="SAM/BAM dataset to be reordered" 45
23 help="If empty, upload or import a SAM/BAM dataset." /> 46 <conditional name="reference_source">
24 <conditional name="source"> 47 <param name="reference_source_selector" type="select" label="Load reference genome from">
25 <param name="indexSource" type="select" label="Select Reference Genome" help="This tool will re-order SAM/BAM in the same order as reference selected below."> 48 <option value="cached">Local cache</option>
26 <option value="built-in">Locally cached</option>
27 <option value="history">History</option> 49 <option value="history">History</option>
28 </param> 50 </param>
29 <when value="built-in"> 51 <when value="cached">
30 <param name="ref" type="select" label="Select a reference genome"> 52 <param name="ref_file" type="select" label="Use dictionary from the list" help="Select genome from the list">
31 <options from_data_table="picard_indexes" /> 53 <options from_data_table="picard_indexes">
54 <filter type="sort_by" column="2" />
55 <validator type="no_options" message="No indexes are available" />
56 </options>
57 <validator type="no_options" message="A built-in dictionary is not available for the build associated with the selected input file"/>
32 </param> 58 </param>
33 </when> 59 </when>
34 <when value="history"> 60 <when value="history">
35 <param name="refFile" type="data" format="fasta" metadata_name="dbkey" label="Using reference file" /> 61 <param name="ref_file" type="data" format="fasta" label="Use the following dataset to create dictionary" help="You can upload a FASTA sequence to the history from which Picard will automatically generate dictionary using CreateSequenceDictionary command" />
36 <param name="speciesName" type="text" value="" label="Species name" />
37 <param name="buildName" type="text" value="" label="Build name" />
38 <param name="truncateSeqNames" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Truncate sequence names after first whitespace" />
39 </when> 62 </when>
40 </conditional> 63 </conditional>
41 <param name="allowIncDictConcord" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Allow incomplete dict concordance?" help="Allows a partial overlap of the BAM contigs with the new reference sequence contigs." /> 64
42 <param name="allowContigLenDiscord" type="boolean" checked="False" truevalue="true" falsevalue="false" label="Allow contig length discordance?" help="This is dangerous--don't check it unless you know exactly what you're doing!" /> 65 <param format="sam,bam" name="inputFile" type="data" label="Select SAM/BAM dataset or dataset collection" help="If empty, upload or import a SAM/BAM dataset"/>
43 <param name="outputFormat" type="boolean" checked="True" truevalue="bam" falsevalue="sam" label="Output BAM instead of SAM" help="Uncheck for SAM output" /> 66 <param name="allow_incomplete_dict_concordance" type="boolean" label="If true, then allows only a partial overlap of the BAM contigs with the new reference sequence contigs" help="ALLOW_INCOMPLETE_DICT_CONCORDANCE; By default, this tool requires a corresponding contig in the new reference for each read contig; default=False"/>
67 <param name="allow_contig_length_discordance" type="boolean" label="If true, then permits mapping from a read contig to a new reference contig with the same name but a different length" help="ALLOW_CONTIG_LENGTH_DISCORDANCE; HIGHLY DANGEROUS! Only use if you know what you are doing; default=False"/>
68 <expand macro="VS" />
69
44 </inputs> 70 </inputs>
45 <outputs> 71 <outputs>
46 <data name="outFile" format="bam" label="${tool.name} on ${on_string}: reordered ${outputFormat}"> 72 <data name="outFile" format="bam" label="${tool.name} on ${on_string}: Reordered BAM"/>
47 <change_format>
48 <when input="outputFormat" value="sam" format="sam" />
49 </change_format>
50 </data>
51 </outputs> 73 </outputs>
52 <tests> 74 <tests>
53 <test> 75 <test>
54 <!-- Commands: 76 <param name="reference_source_selector" value="history" />
55 cp test-data/phiX.fasta . 77 <param name="ref_file" value="picard_ReorderSam_ref.fa" ftype="fasta" />
56 samtools faidx phiX.fasta 78 <param name="inputFile" value="picard_ReorderSam.bam" ftype="bam"/>
57 java -jar CreateSequenceDictionary.jar R=phiX.fasta O=phiX.dict URI=phiX.fasta TRUNCATE_NAMES_AT_WHITESPACE=false SPECIES=phiX174 79 <param name="allow_incomplete_dict_concordance" value="false"/>
58 java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input1.bam O=picard_RS_output1.bam REFERENCE=phiX.fasta ALLOW_INCOMPLETE_DICT_CONCORDANCE=false ALLOW_CONTIG_LENGTH_DISCORDANCE=false 80 <param name="allow_contig_length_discordance" value="false"/>
59 --> 81 <output name="outFile" file="picard_ReorderSam_test1.bam" ftype="bam" lines_diff="2"/>
60 <param name="inputFile" value="picard_RS_input1.bam" />
61 <param name="indexSource" value="history" />
62 <param name="refFile" value="phiX.fasta" />
63 <param name="speciesName" value="phiX174" />
64 <param name="buildName" value="" />
65 <param name="truncateSeqNames" value="false" />
66 <param name="allowIncDictConcord" value="false" />
67 <param name="allowContigLenDiscord" value="false" />
68 <param name="outputFormat" value="True" />
69 <output name="outFile" file="picard_RS_output1.bam" ftype="bam" lines_diff="4" compare="contains" />
70 </test>
71 <test>
72 <!-- Command:
73 java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input2.sam O=picard_RS_output2.sam REFERENCE=/path/to/phiX/picard_index/phiX.fa ALLOW_INCOMPLETE_DICT_CONCORDANCE=false ALLOW_CONTIG_LENGTH_DISCORDANCE=false
74 /path/to/phiX/srma_index/phiX.fa is path to phiX.fa, phiX.fa.fai, and phiX.dict
75 -->
76 <param name="inputFile" value="picard_RS_input2.sam" />
77 <param name="indexSource" value="built-in" />
78 <param name="ref" value="phiX" />
79 <param name="allowIncDictConcord" value="false" />
80 <param name="allowContigLenDiscord" value="false" />
81 <param name="outputFormat" value="False" />
82 <output name="outFile" file="picard_RS_output2.sam" ftype="sam" lines_diff="4" sort="True" />
83 </test>
84 <test>
85 <!-- Commands:
86 cp test-data/picard_RS_input4.fasta .
87 samtools faidx picard_RS_input4.fasta
88 java -jar CreateSequenceDictionary.jar R=picard_RS_input4.fasta O=picard_RS_input4.dict URI=picard_RS_input4.fasta TRUNCATE_NAMES_AT_WHITESPACE=true SPECIES=phiX174 GENOME_ASSEMBLY=phiX_buildBlah1.1
89 java -jar ReorderSam.jar VALIDATION_STRINGENCY=LENIENT I=test-data/picard_RS_input3.bam O=picard_RS_output3.sam REFERENCE=picard_RS_input4.fasta ALLOW_INCOMPLETE_DICT_CONCORDANCE=true ALLOW_CONTIG_LENGTH_DISCORDANCE=false
90 picard_RS_input3.bam can be made from picard_RS_input3.sam
91 -->
92 <param name="inputFile" value="picard_RS_input3.bam" />
93 <param name="indexSource" value="history" />
94 <param name="refFile" value="picard_RS_input4.fasta" />
95 <param name="speciesName" value="phiX174" />
96 <param name="buildName" value="phiX_buildBlah1.1" />
97 <param name="truncateSeqNames" value="true" />
98 <param name="allowIncDictConcord" value="true" />
99 <param name="allowContigLenDiscord" value="false" />
100 <param name="outputFormat" value="False" />
101 <output name="outFile" file="picard_RS_output3.sam" ftype="sam" lines_diff="12" sort="True" />
102 </test> 82 </test>
103 </tests> 83 </tests>
84
85 <stdio>
86 <exit_code range="1:" level="fatal"/>
87 </stdio>
88
104 <help> 89 <help>
105 90
106 .. class:: infomark 91 .. class:: infomark
107 92
108 **Purpose** 93 **Purpose**
109 94
110 Reorder SAM/BAM to match contig ordering in a particular reference file. Note that this is 95 ReorderSam reorders reads in a SAM/BAM file to match the contig ordering in a provided reference file, as determined by exact name matching of contigs. Reads mapped to contigs absent in the new reference are dropped.
111 not the same as sorting as done by the SortSam tool, which sorts by either coordinate
112 values or query name. The ordering in ReorderSam is based on exact name matching of
113 contigs/chromosomes. Reads that are mapped to a contig that is not in the new reference file are
114 not included in the output.
115 96
116 **Picard documentation** 97 @dataset_collections@
117 98
118 This is a Galaxy wrapper for ReorderSam, a part of the external package Picard-tools_. 99 ----
119
120 .. _Picard-tools: http://www.google.com/search?q=picard+samtools
121
122 ------
123
124 .. class:: infomark
125
126 **Inputs, outputs, and parameters**
127
128 For the file that needs to be reordered, either a sam file or a bam file must be supplied.
129 If a bam file is used, it must be coordinate-sorted. A reference file is also required,
130 so either a fasta file should be supplied or a built-in reference can be selected.
131
132 The output contains the same reads as the input file but the reads have been rearranged so
133 they appear in the same order as the provided reference file. The tool will output either
134 bam (the default) or sam, according to user selection. Bam is recommended since it is smaller.
135
136 The only extra parameters that can be set are flags for allowing incomplete dict concordance
137 and allowing contig length discordance. If incomplete dict concordance is allowed, only a
138 partial overlap of the bam contigs with the new reference sequence contigs is required. By
139 default it is off, requiring a corresponding contig in the new reference for each read contig.
140 If contig length discordance is allowed, contig names that are the same between a read and the
141 new reference contig are allowed even if they have different lengths. This is usually not a
142 good idea, unless you know exactly what you're doing. It's off by default.
143 100
144 .. class:: warningmark 101 .. class:: warningmark
145 102
146 **Warning on SAM/BAM quality** 103 Not to be confused with **SortSam**.
147 104
148 Many SAM/BAM files produced externally and uploaded to Galaxy do not fully conform to SAM/BAM specifications. Galaxy deals with this by using the **LENIENT** 105 @description@
149 flag when it runs Picard, which allows reads to be discarded if they're empty or don't map. This appears
150 to be the only way to deal with SAM/BAM that cannot be parsed.
151 106
107 ALLOW_INCOMPLETE_DICT_CONCORDANCE=Boolean
108 S=Boolean If true, then allows only a partial overlap of the BAM contigs with the new reference
109 sequence contigs. By default, this tool requires a corresponding contig in the new
110 reference for each read contig Default value: false. Possible values: {true, false}
111
112 ALLOW_CONTIG_LENGTH_DISCORDANCE=Boolean
113 U=Boolean If true, then permits mapping from a read contig to a new reference contig with the same
114 name but a different length. Highly dangerous, only use if you know what you are doing.
115 Default value: false. Possible values: {true, false}
152 116
117 @more_info@
153 </help> 118 </help>
154 </tool> 119 </tool>
155 120
156 121
157 122
162 127
163 128
164 129
165 130
166 131
132