comparison bamtools_split_ref.xml @ 1:9dbf707bebb0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tool_collections/bamtools/bamtools_split_ref commit a14db40361bcb2ee608bccd9222e1654aaea3324-dirty
author iuc
date Wed, 11 Jan 2023 12:03:53 +0000
parents 09470ab960f1
children 9b520009db81
comparison
equal deleted inserted replaced
0:09470ab960f1 1:9dbf707bebb0
1 <tool id="bamtools_split_ref" name="Split BAM by Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> 1 <tool id="bamtools_split_ref" name="Split BAM by Reference" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09">
2 <description>into dataset list collection</description> 2 <description>into dataset list collection</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements">
7 <command> 7 <requirement type="package" version="1.16.1">samtools</requirement>
8 <![CDATA[ 8 </expand>
9 ln -s '${input_bam}' 'localbam.bam' && 9 <command><![CDATA[
10 ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' && 10 ln -s '${input_bam}' localbam.bam &&
11 bamtools split -reference 11 ln -s '${input_bam.metadata.bam_index}' 'localbam.bam.bai' &&
12 -in localbam.bam 12 samtools view -bH localbam.bam --no-PG -o header.bam &&
13 -stub split_bam 13 bamtools split -reference
14 ## Preserve order from metadata in the output collection 14 -in localbam.bam
15 #import re 15 -stub split_bam
16 #set $name = $re.sub('\W','_',$re.sub('\.bam$','',$input_bam.name)) 16 && (IFS=',';
17 #if str($refs) != 'None': 17 for i in \$REFS_FROM_BAM_METADATA;
18 #set $ref_list = ' '.join(str($refs).split(",")) 18 do FN=`printf "split_bam.REF_%s.bam" "\$i"`;
19 #else 19 if [ ! -f \$FN ]; then cp header.bam "\$FN"; fi;
20 #set $ref_list = ' '.join([$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]) 20 done)
21 #end if 21 && cp '$c1' galaxy.json
22 && mkdir -p outputs 22 ]]></command>
23 && (export I=0; 23 <environment_variables>
24 for i in $ref_list; 24 <environment_variable name="REFS_FROM_BAM_METADATA">#import re
25 do I=\$((++I)); SN=`printf "split_bam.REF_%s.bam" "\$i"`; 25 ## need to extract ref names from Galaxy's safe string representation
26 if [ -e \$SN ]; then FN=`printf "outputs/split_bam%05d%s.%s.bam" \$((I)) '$name' "\$i"`; mv \$SN \$FN; fi; 26 #set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
27 done) 27 #if str($refs) != 'None'
28 ]]> 28 #set $refs_selected = set(str($refs).split(","))
29 </command> 29 ## sort the selected refs by their order in the bam metadata
30 #echo ','.join([r for r in $ref_list if r in refs_selected])
31 #else
32 #echo ','.join($ref_list)
33 #end if
34 </environment_variable>
35 </environment_variables>
36 <configfiles>
37 <configfile name="c1">#import re
38 ## need to extract ref names from Galaxy's safe string representation
39 #set $ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
40 #if str($refs) != 'None'
41 #set $refs_selected = set(str($refs).split(","))
42 #set $ref_list = [r for r in $ref_list if r in refs_selected]
43 #end if
44 #set $elems = [{'name': '%s: %s' % ($input_bam.name, r), 'filename': 'split_bam.REF_%s.bam' % r, 'dbkey': str($input_bam.dbkey)} for r in $ref_list]
45 #import json
46 #echo json.dumps({'output_bams': {'elements': $elems}})</configfile>
47 </configfiles>
30 <inputs> 48 <inputs>
31 <param name="input_bam" type="data" format="bam" label="BAM dataset to split by reference"/> 49 <param name="input_bam" type="data" format="bam" label="BAM dataset to split by reference"/>
32 <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" > 50 <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" >
33 <help><![CDATA[Click and type in the box above to see options. You can select multiple entries. 51 <help><![CDATA[Click and type in the box above to see options. You can select multiple entries.
34 If "No options available" is displayed, you need to re-detect metadata on the input dataset. 52 If "No options available" is displayed, you need to re-detect metadata on the input dataset.
37 <filter type="data_meta" ref="input_bam" key="reference_names" /> 55 <filter type="data_meta" ref="input_bam" key="reference_names" />
38 </options> 56 </options>
39 </param> 57 </param>
40 </inputs> 58 </inputs>
41 <outputs> 59 <outputs>
42 <collection name="output_bams" type="list" label="${input_bam.name} Split List"> 60 <collection name="output_bams" type="list">
43 <discover_datasets pattern="split_bam\d*(?P&lt;designation&gt;.+)\.bam" ext="bam" directory="outputs" visible="false"/> 61 <discover_datasets from_provided_metadata="true" ext="bam" visible="false" />
44 </collection> 62 </collection>
45 </outputs> 63 </outputs>
46 <tests> 64 <tests>
47 <test> 65 <test>
48 <param name="input_bam" ftype="bam" value="bamtools-input2.bam"/> 66 <param name="input_bam" ftype="bam" value="bamtools-input2.bam" />
49 <output_collection name="output_bams" type="list"> 67 <output_collection name="output_bams" type="list" count="25">
50 <element name="bamtools_input2.chr1" file="bamtools_input2.chr1" compare="sim_size" delta="500" /> 68 <element name="bamtools-input2.bam: chrM" file="bamtools_input2.header.bam" ftype="bam" />
69 <element name="bamtools-input2.bam: chr1" file="bamtools_input2.chr1" ftype="bam" />
70 <element name="bamtools-input2.bam: chr21" file="bamtools_input2.chr21.bam" ftype="bam" />
71 </output_collection>
72 </test>
73 <test>
74 <param name="input_bam" ftype="bam" value="bamtools-input2.bam" />
75 <param name="refs" value="chrM,chr1,chr21" />
76 <output_collection name="output_bams" type="list" count="3">
77 <element name="bamtools-input2.bam: chrM" file="bamtools_input2.header.bam" ftype="bam" />
78 <element name="bamtools-input2.bam: chr1" file="bamtools_input2.chr1" ftype="bam" />
79 <element name="bamtools-input2.bam: chr21" file="bamtools_input2.chr21.bam" ftype="bam" />
51 </output_collection> 80 </output_collection>
52 </test> 81 </test>
53 </tests> 82 </tests>
54 <help> 83 <help>
55 **What is does** 84 **What is does**
56 85
57 BAMTools split is a utility for splitting BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools). 86 BAMTools split is a utility for splitting BAM files. It is based on the BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
58
59 -----
60
61 .. class:: warningmark
62
63 **DANGER: Multiple Outputs**
64
65 As described below, splitting a BAM dataset(s) on reference name or a tag value can produce very large numbers of outputs. Read below and know what you are doing.
66 87
67 ----- 88 -----
68 89
69 **How it works** 90 **How it works**
70 91
71 Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references. 92 Split alignments by reference name into a dataset list collection. The collection will be in the same order as the input BAM references and will consist of as many elements as there are references selected or listed in the input BAM header.
72 93
73 In cases of unfinished genomes with very large number of reference sequences (scaffolds) 94 .. class:: warningmark
74 it can generate thousands (if not millions) of output datasets.
75 95
96 In cases of unfinished genomes with very large number of reference sequences (scaffolds)
97 this could generate thousands (if not millions) of output datasets.
76 98
77 ----- 99 -----
78 100
79 .. class:: infomark 101 .. class:: infomark
80 102