comparison samtools_merge.xml @ 1:8890e2d4c068 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_merge commit 0f75269223c0821c6c82acf98fde947d0f816f2b"
author iuc
date Tue, 28 Sep 2021 16:14:52 +0000
parents 740ce0a18f0d
children b40e2d865d52
comparison
equal deleted inserted replaced
0:740ce0a18f0d 1:8890e2d4c068
1 <tool id="samtools_merge" name="Samtools merge" version="@TOOL_VERSION@"> 1 <tool id="samtools_merge" name="Samtools merge" version="@TOOL_VERSION@" profile="@PROFILE@">
2 <description>merge multiple sorted alignment files</description> 2 <description>merge multiple sorted alignment files</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements"/> 6 <expand macro="requirements"/>
7 <expand macro="stdio"/> 7 <expand macro="stdio"/>
8 <expand macro="version_command"/> 8 <expand macro="version_command"/>
9 <command><![CDATA[ 9 <command><![CDATA[
10 @ADDTHREADS@ 10 @ADDTHREADS@
11 11 ## prepare input (sort sam, link other), determine input ordering
12 ## prepare input (sort sam, link other), determine input ordering
13 ## and check for consistent input ordering 12 ## and check for consistent input ordering
14 #set sortby='' 13 #set sortby=''
15 #for $i, $bam in enumerate( $bamfiles ): 14 #for $i, $bam in enumerate( $bamfiles ):
16 #if $bam.is_of_type('sam', 'cram', ): 15 #if $bam.is_of_type('sam', 'cram', ):
17 samtools sort 16 samtools sort
18 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T sorttemp 17 -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}"
19 -O sam 18 -O sam
20 -o ${i}.sam 19 -o ${i}.sam
21 '$bam' && 20 '$bam' &&
22 #else: 21 #else:
23 ln -s '$bam' ${i}.sam && 22 ln -s '$bam' ${i}.sam &&
24 #end if 23 #end if
25
26 #if ($sortby=='coord' or $sortby=='') and $bam.is_of_type('sam','bam','cram'): 24 #if ($sortby=='coord' or $sortby=='') and $bam.is_of_type('sam','bam','cram'):
27 #set sortby='coord' 25 #set sortby='coord'
28 #else if ($sortby=='name' or $sortby=='') and $bam.is_of_type('qname_sorted.bam', 'qname_input_sorted.bam'): 26 #else if ($sortby=='name' or $sortby=='') and $bam.is_of_type('qname_sorted.bam', 'qname_input_sorted.bam'):
29 #set $sortby='name' 27 #set $sortby='name'
30 #else: 28 #else:
31 >&2 echo "inconsistently sorted input" && 29 >&2 echo "inconsistently sorted input" &&
32 exit 1 && 30 exit 1 &&
33 #end if 31 #end if
34 #end for 32 #end for
35 33 #if $bed_file
34 #for $i, $bam in enumerate( $bamfiles ):
35 samtools index ${i}.sam &&
36 #end for
37 #end if
36 samtools merge 38 samtools merge
37 -@ \$addthreads 39 -@ \$addthreads
38 -s $seed 40 -s $seed
39 ## TODO force overwrite seems necessay (but I do not understand why ...) 41 ## TODO force overwrite seems necessay (but I do not understand why ...)
40 -f 42 -f
41 ## Galaxy provides only default compression 43 ## Galaxy provides only default compression
42 ## #if $compression == 'levelone' 44 ## #if $compression == 'levelone'
43 ## -1 45 ## -1
44 ## #else if $compression == 'uncompressed' 46 ## #else if $compression == 'uncompressed'
45 ## -u 47 ## -u
49 #end if 51 #end if
50 #if $sortby=='name' 52 #if $sortby=='name'
51 -n 53 -n
52 #end if 54 #end if
53 ## TODO since galaxy can't represent this as data type at the moment this option is unsupported 55 ## TODO since galaxy can't represent this as data type at the moment this option is unsupported
54 ## -t TAG The input alignments have been sorted by the value of TAG, then by either position or name (if -n is given). 56 ## -t TAG The input alignments have been sorted by the value of TAG, then by either position or name (if -n is given).
55 #if str($region) != '' 57 #if str($region) != ''
56 -R '$region' 58 -R '$region'
57 #end if 59 #end if
58 ## Attach an RG tag to each alignment. The tag value is inferred from file names. 60 ## Attach an RG tag to each alignment. The tag value is inferred from file names.
59 ## -r 61 ## -r
60 ## TODO -r makes no sense with the link names, is there some data set metadata (tags,...) that could be used? 62 ## TODO -r makes no sense with the link names, is there some data set metadata (tags,...) that could be used?
63 #if $bed_file:
64 -L '$bed_file'
65 #end if
61 $idrg 66 $idrg
62 $idpg 67 $idpg
63 $output 68 $output
64 #for $i, $bam in enumerate( $bamfiles ): 69 #for $i, $bam in enumerate( $bamfiles ):
65 ${i}.sam 70 ${i}.sam
66 #end for 71 #end for
67 ]]></command> 72 ]]></command>
68 <inputs> 73 <inputs>
69 <param name="bamfiles" type="data" format="sam,bam,cram" multiple="true" optional="false" label="Alignments in BAM format" help="Sets of aligned reads." /> 74 <param name="bamfiles" type="data" format="sam,bam,cram" multiple="true" optional="false" label="Alignments in BAM format" help="Sets of aligned reads." />
75 <param name="bed_file" type="data" optional="true" format="bed" label="Merge only reads overlapping the specified regions in the BED file" />
70 <param name="region" type="text" optional="true" argument="-n" label="Merge files in a region" help="Merge files in the specified region indicated by a string" /> 76 <param name="region" type="text" optional="true" argument="-n" label="Merge files in a region" help="Merge files in the specified region indicated by a string" />
71 <param name="headerbam" type="data" format="sam,bam" argument="-h" multiple="false" optional="true" label="File to take @headers from" help="Use the lines of FILE as `@' headers to be copied to out.bam, replacing any header lines that would otherwise be copied from in1.bam. (FILE is actually in SAM format, though any alignment records it may contain are ignored.)" /> 77 <param name="headerbam" type="data" format="sam,bam" argument="-h" multiple="false" optional="true" label="File to take @headers from" help="Use the lines of FILE as `@' headers to be copied to out.bam, replacing any header lines that would otherwise be copied from in1.bam. (FILE is actually in SAM format, though any alignment records it may contain are ignored.)" />
72 <param name="idrg" type="boolean" argument="-c" truevalue="-c" falsevalue="" checked="false" label="Make @RG headers unique" help="When several input files contain @RG headers with the same ID, emit only one of them (namely, the header line from the first file we find that ID in) to the merged output file. Combining these similar headers is usually the right thing to do when the files being merged originated from the same file. Without -c, all @RG headers appear in the output file, with random suffices added to their IDs where necessary to differentiate them." /> 78 <param name="idrg" type="boolean" argument="-c" truevalue="-c" falsevalue="" checked="false" label="Make @RG headers unique" help="When several input files contain @RG headers with the same ID, emit only one of them (namely, the header line from the first file we find that ID in) to the merged output file. Combining these similar headers is usually the right thing to do when the files being merged originated from the same file. Without -c, all @RG headers appear in the output file, with random suffices added to their IDs where necessary to differentiate them." />
73 <param name="idpg" type="boolean" argument="-p" truevalue="-p" falsevalue="" checked="false" label="Make @PG headers unique" help="Similarly, for each @PG ID in the set of files to merge, use the @PG line of the first file we find that ID in rather than adding a suffix to differentiate similar IDs." /> 79 <param name="idpg" type="boolean" argument="-p" truevalue="-p" falsevalue="" checked="false" label="Make @PG headers unique" help="Similarly, for each @PG ID in the set of files to merge, use the @PG line of the first file we find that ID in rather than adding a suffix to differentiate similar IDs." />
74 <param name="seed" type="integer" argument="-s" value="1" label="random seed" /> 80 <param name="seed" type="integer" argument="-s" value="1" label="random seed" />
81 </inputs> 87 </inputs>
82 <outputs> 88 <outputs>
83 <data name="output" format="bam" /> 89 <data name="output" format="bam" />
84 </outputs> 90 </outputs>
85 <tests> 91 <tests>
86 <!-- tests and data extracted from 92 <!--tests and data extracted from
87 https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl 93 https://github.com/samtools/samtools/blob/9ce8c64493f7ea3fa69bc5c1ac980b1a8e3dcf1f/test/test.pl
88 https://github.com/samtools/samtools/tree/develop/test/merge --> 94 https://github.com/samtools/samtools/tree/develop/test/merge -->
89 <!-- # Merge 1 - Standard 3 file SAM merge all presented on the command line (only checks for similar size, because generated header info differs) --> 95 <!-- Merge 1 - Standard 3 file SAM merge all presented on the command line (only checks for similar size, because generated header info differs) -->
90 <test> 96 <test>
91 <param name="bamfiles" value="test_input_1_a.sam,test_input_1_b.sam,test_input_1_c.sam" /> 97 <param name="bamfiles" value="test_input_1_a.sam,test_input_1_b.sam,test_input_1_c.sam" />
92 <output name="output" file="2.merge.expected.bam" compare="sim_size" delta="50" /> 98 <output name="output" file="1.merge.expected.bam" ftype="bam" lines_diff="16" />
93 </test> 99 </test>
94 <!-- Merge 2 - Standard 3 file BAM merge all files presented on the command line --> 100 <!-- Merge 2 - Standard 3 file BAM merge all files presented on the command line -->
95 <test> 101 <test>
96 <param name="bamfiles" value="test_input_1_a.bam,test_input_1_b.bam,test_input_1_c.bam" /> 102 <param name="bamfiles" value="test_input_1_a.bam,test_input_1_b.bam,test_input_1_c.bam" />
97 <output name="output" file="2.merge.expected.bam" compare="sim_size" delta="50" /> 103 <output name="output" file="2.merge.expected.bam" ftype="bam" lines_diff="16" />
98 </test> 104 </test>
99 <!-- Merge 4 - 1 file BAM merge with file presented on the command line --> 105 <!-- Merge 3 - 1 file BAM merge with file presented on the command line -->
100 <test> 106 <test>
101 <param name="bamfiles" value="test_input_1_b.bam" /> 107 <param name="bamfiles" value="test_input_1_b.bam" />
102 <output name="output" file="4.merge.expected.bam" compare="sim_size" delta="50" /> 108 <output name="output" file="3.merge.expected.bam" ftype="bam" lines_diff="16" />
103 </test> 109 </test>
104 <!--Merge 5 - 3 file SAM merge all presented on the command line override IDs to file names (not implemented in tool) --> 110 <!--Merge 4 - merge all presented on the command line, combine PG and RG rather than dedup -->
105 <!--Merge 6 - merge all presented on the command line, combine PG and RG rather than dedup -->
106 <test> 111 <test>
107 <param name="bamfiles" value="test_input_1_a.bam,test_input_1_b.bam" /> 112 <param name="bamfiles" value="test_input_1_a.bam,test_input_1_b.bam" />
108 <param name="idrg" value="-c" /> 113 <param name="idrg" value="-c" />
109 <param name="idpg" value="-p" /> 114 <param name="idpg" value="-p" />
110 <output name="output" file="6.merge.expected.bam" compare="sim_size" delta="50" /> 115 <output name="output" file="4.merge.expected.bam" ftype="bam" lines_diff="16" />
111 </test> 116 </test>
112 <!-- Merge 7 - ID and SN with regex in them (probably not necessary for the galaxy tool because just different input) --> 117 <!-- Merge 5 - ID and SN with regex in them (probably not necessary for the galaxy tool because just different input) -->
113 <test> 118 <test>
114 <param name="bamfiles" value="test_input_1_a_regex.sam,test_input_1_b_regex.sam" /> 119 <param name="bamfiles" value="test_input_1_a_regex.sam,test_input_1_b_regex.sam" />
115 <output name="output" file="7.merge.expected.bam" compare="sim_size" delta="50" /> 120 <output name="output" file="5.merge.expected.bam" ftype="bam" lines_diff="16" />
116 </test> 121 </test>
122 <!-- Merge 6 - Merging with bedfile -->
123 <test>
124 <param name="bamfiles" value="test_input_1_a.bam,test_input_1_b.bam,test_input_1_c.bam" />
125 <param name="bed" value="yes" />
126 <param name="bed_file" value="test_input_1_a.bed" />
127 <output name="output" file="6.merge.expected.bam" ftype="bam" lines_diff="16" />
128 </test>
129 <!--Merge - 3 file SAM merge all presented on the command line override IDs to file names (not implemented in tool) -->
117 <!-- Sort inputs by PG, then merge (not implemented, since -t not supported in the tool) --> 130 <!-- Sort inputs by PG, then merge (not implemented, since -t not supported in the tool) -->
118 <!-- Sort inputs by PG, then merge (name sorted) (not implemented, since -t not supported in the tool) --> 131 <!-- Sort inputs by PG, then merge (name sorted) (not implemented, since -t not supported in the tool) -->
119 </tests> 132 </tests>
120 <help> 133 <help>
121 **What it does** 134 **What it does**
122 Merge multiple sorted alignment files, producing a single sorted output file that contains all the input records and maintains the existing sort order. 135 Merge multiple sorted alignment files, producing a single sorted output file that contains all the input records and maintains the existing sort order.
123 136
124 If a file to take @headers from is specified the @SQ headers of input files will be merged into the specified header, otherwise they will be merged into a composite header created from the input headers. If in the process of merging @SQ lines for coordinate sorted input files, a conflict arises as to the order (for example input1.bam has @SQ for a,b,c and input2.bam has b,a,c) then the resulting output file will need to be re-sorted back into coordinate order. 137 If a file to take @headers from is specified the @SQ headers of input files will be merged into the specified header, otherwise they will be merged into a composite header created from the input headers. If in the process of merging @SQ lines for coordinate sorted input files, a conflict arises as to the order (for example input1.bam has @SQ for a,b,c and input2.bam has b,a,c) then the resulting output file will need to be re-sorted back into coordinate order.
125 138
126 Unless the @PG/@RG headers are made unique when merging @RG and @PG records into the output header then any IDs found to be duplicates of existing IDs in the output header will have a suffix appended to them to differentiate them from similar header records from other files and the read records will be updated to reflect this. 139 Unless the @PG/@RG headers are made unique when merging @RG and @PG records into the output header then any IDs found to be duplicates of existing IDs in the output header will have a suffix appended to them to differentiate them from similar header records from other files and the read records will be updated to reflect this.
127 </help> 140 </help>
128 <expand macro="citations"/> 141 <expand macro="citations"/>
129 </tool> 142 </tool>