changeset 2:8ea06787c08a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
author artbio
date Tue, 09 Oct 2018 17:14:57 -0400
parents ae9ea0488850
children 120eb76aa500
files bamparse.py bamparse.xml test-data/input1.bam test-data/input2.bam test-data/input_new2.bam test-data/more_antisense_table.tabular test-data/more_sense_table.tabular test-data/more_table0.tabular test-data/more_table1.tabular test-data/more_table2.tabular test-data/table_antisense.tabular
diffstat 11 files changed, 466 insertions(+), 54 deletions(-) [+]
line wrap: on
line diff
--- a/bamparse.py	Sun Oct 15 19:14:29 2017 -0400
+++ b/bamparse.py	Tue Oct 09 17:14:57 2018 -0400
@@ -9,10 +9,6 @@
     the_parser = argparse.ArgumentParser()
     the_parser.add_argument('--output', nargs='+', action='store', type=str,
                             help='Count tables')
-    the_parser.add_argument('--polarity',
-                            choices=["sense", "antisense", "both"],
-                            help="forward, reverse or both forward an\
-                                reverse reads are counted")
     the_parser.add_argument('--alignments', nargs='+',
                             help="bam alignments files")
     the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
@@ -23,37 +19,17 @@
     return args
 
 
-def get_counts(bamfile, polarity="both"):
+def get_counts(bamfile):
     """
     Takes an AlignmentFile object and returns a dictionary of counts for sense,
-    antisense, or both sense and antisense reads aligning to the bam references
+    antisense, or both sense and antisense bam alignments to the references,
+    depending on the pre-treatment performed by sambamba in the xml wrapper
     """
-    def filter_sense_read(read):
-        if read.is_reverse:
-            return 0
-        else:
-            return 1
-
-    def filter_antisense_read(read):
-        if read.is_reverse:
-            return 1
-        else:
-            return 0
-
     counts = defaultdict(int)
     for ref_name in bamfile.references:
         counts[ref_name] = 0
-    if polarity == "both":
-        for ref_name in bamfile.references:
-            counts[ref_name] = bamfile.count(reference=ref_name)
-    if polarity == "sense":
-        for ref_name in bamfile.references:
-            for read in bamfile.fetch(ref_name):
-                counts[ref_name] += filter_sense_read(read)
-    if polarity == "antisense":
-        for ref_name in bamfile.references:
-            for read in bamfile.fetch(ref_name):
-                counts[ref_name] += filter_antisense_read(read)
+    for ref_name in bamfile.references:
+        counts[ref_name] = bamfile.count(reference=ref_name)
     return counts
 
 
@@ -80,14 +56,14 @@
             out.close()
 
 
-def main(alignments, labels, polarity, output, number):
+def main(alignments, labels, output, number):
     diclist = []
     for file in alignments:
         bam_object = pysam.AlignmentFile(file, 'rb')
-        diclist.append(get_counts(bam_object, polarity=polarity))
+        diclist.append(get_counts(bam_object))
     writetable(diclist, labels, output, number)
 
 
 if __name__ == "__main__":
     args = Parser()
-    main(args.alignments, args.labels, args.polarity, args.output, args.number)
+    main(args.alignments, args.labels, args.output, args.number)
--- a/bamparse.xml	Sun Oct 15 19:14:29 2017 -0400
+++ b/bamparse.xml	Tue Oct 09 17:14:57 2018 -0400
@@ -1,28 +1,34 @@
-<tool id="bamparse" name="Count alignments" version="2.0.1">
+<tool id="bamparse" name="Count alignments" version="3.0.0">
     <description>in a BAM file</description>
     <requirements>
-	        <requirement type="package" version="1.1.2">bowtie</requirement>
-            <requirement type="package" version="1.11.2">numpy</requirement>
             <requirement type="package" version="0.11.2.1">pysam</requirement>
+            <requirement type="package" version="0.6.6">sambamba</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" description="Tool exception" />
     </stdio>
     <command detect_errors="exit_code"><![CDATA[
         mkdir outputdir &&
+        #if $polarity == 'sense':
+            #set pol=' and not reverse_strand'
+        #else if $polarity == 'antisense':
+            #set pol=' and reverse_strand'
+        #else:
+            #set pol=''
+        #end if
         #for $file in $input_list
-           samtools index '$file' &&
+           sambamba view -t \$GALAXY_SLOTS -F "not unmapped$pol" -f bam '$file' -o '$file.element_identifier' &&
+           samtools index '$file.element_identifier' &&
         #end for
         python $__tool_directory__/bamparse.py
         --alignments
         #for $file in $input_list
-            '$file'
+            '$file.element_identifier'
         #end for
         --labels
         #for $file in $input_list
             '$file.element_identifier'
         #end for
-	    --polarity '$polarity'
         --number '$output_option'
  ]]></command>
     <inputs>
@@ -44,6 +50,58 @@
             <discover_datasets pattern="(?P&lt;designation&gt;.*)\.tabular" ext="tabular" visible="true" assign_primary_output="true" directory="outputdir"/>
         </data>
     </outputs>
+    <tests>
+        <test>
+            <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" />
+            <param name="polarity" value="both" />
+            <param name="output_option" value="unique" />
+            <output name="output" ftype="tabular" file="table.tabular" />
+        </test>
+        <test>
+            <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" />
+            <param name="polarity" value="both" />
+            <param name="output_option" value="multiple" />
+            <output name="output" ftype="tabular" file="table0.tabular">
+                <discovered_dataset designation="table1" ftype="tabular" file="table1.tabular" />
+            </output>
+        </test>
+        <test>
+            <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" />
+            <param name="polarity" value="sense" />
+            <param name="output_option" value="unique" />
+            <output name="output" ftype="tabular" file="table.tabular" />
+        </test>
+        <test>
+            <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="bam" />
+            <param name="polarity" value="antisense" />
+            <param name="output_option" value="unique" />
+            <output name="output" ftype="tabular" file="table_antisense.tabular" />
+        </test>
+
+        <test>
+            <param name="input_list" value="input1.bam,input2.bam,input_new2.bam" ftype="bam" />
+            <param name="polarity" value="both" />
+            <param name="output_option" value="multiple" />
+            <output name="output" ftype="tabular" file="more_table0.tabular">
+                <discovered_dataset designation="table1" ftype="tabular" file="more_table1.tabular" />
+                <discovered_dataset designation="table2" ftype="tabular" file="more_table2.tabular" />
+            </output>
+        </test>
+        <test>
+            <param name="input_list" value="input1.bam,input2.bam,input_new2.bam" ftype="bam" />
+            <param name="polarity" value="sense" />
+            <param name="output_option" value="unique" />
+            <output name="output" ftype="tabular" file="more_sense_table.tabular" />
+        </test>
+        <test>
+            <param name="input_list" value="input1.bam,input2.bam,input_new2.bam" ftype="bam" />
+            <param name="polarity" value="antisense" />
+            <param name="output_option" value="unique" />
+            <output name="output" ftype="tabular" file="more_antisense_table.tabular" />
+        </test>
+
+
+    </tests>
     <help>
 
 **What it does**
@@ -56,20 +114,4 @@
 The library labels in the returned count table are taken from the input bam datasets
 names in the Galaxy history.
     </help>
-    <tests>
-        <test>
-            <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="tabular" />
-            <param name="polarity" value="both" />
-            <param name="output_option" value="unique" />
-            <output name="output" ftype="tabular" file="table.tabular" />
-        </test>
-        <test>
-            <param name="input_list" value="alignment1.bam,alignment2.bam" ftype="tabular" />
-            <param name="polarity" value="both" />
-            <param name="output_option" value="multiple" />
-            <output name="output" ftype="tabular" file="table0.tabular">
-                <discovered_dataset designation="table1" ftype="tabular" file="table1.tabular" />
-            </output>
-        </test>
-    </tests>
 </tool>
Binary file test-data/input1.bam has changed
Binary file test-data/input2.bam has changed
Binary file test-data/input_new2.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/more_antisense_table.tabular	Tue Oct 09 17:14:57 2018 -0400
@@ -0,0 +1,31 @@
+gene	input1.bam	input2.bam	input_new2.bam
+FBtr0070001	0	0	0
+FBtr0070533	0	0	0
+FBtr0070603	0	0	0
+FBtr0070604	0	0	0
+FBtr0070911	0	0	0
+FBtr0078490	0	0	0
+FBtr0078580	0	0	0
+FBtr0078790	1	0	0
+FBtr0079064	0	0	0
+FBtr0079090	1	0	0
+FBtr0079338	0	0	0
+FBtr0079528	0	0	0
+FBtr0079596	0	1	1
+FBtr0079677	0	0	0
+FBtr0079690	0	0	0
+FBtr0079692	0	0	0
+FBtr0079693	0	0	0
+FBtr0079694	0	0	0
+FBtr0079702	0	0	0
+FBtr0079728	0	0	0
+FBtr0079729	0	0	0
+FBtr0079752	0	0	0
+FBtr0079820	0	0	0
+FBtr0080609	0	0	0
+FBtr0080644	0	0	0
+FBtr0080646	0	0	0
+FBtr0080647	0	0	0
+FBtr0080660	0	0	0
+FBtr0080663	0	0	0
+FBtr0080664	0	2	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/more_sense_table.tabular	Tue Oct 09 17:14:57 2018 -0400
@@ -0,0 +1,31 @@
+gene	input1.bam	input2.bam	input_new2.bam
+FBtr0070001	57	24	24
+FBtr0070533	24	45	45
+FBtr0070603	70	61	61
+FBtr0070604	40	55	55
+FBtr0070911	2	3	3
+FBtr0078490	8	14	14
+FBtr0078580	1104	370	370
+FBtr0078790	13	10	10
+FBtr0079064	4	5	0
+FBtr0079090	5	7	7
+FBtr0079338	16	23	23
+FBtr0079528	106	365	365
+FBtr0079596	152	315	315
+FBtr0079677	4	7	7
+FBtr0079690	5	5	5
+FBtr0079692	6	4	4
+FBtr0079693	6	9	9
+FBtr0079694	7	6	6
+FBtr0079702	4	4	4
+FBtr0079728	8	6	6
+FBtr0079729	4	4	4
+FBtr0079752	8	2	2
+FBtr0079820	13	109	109
+FBtr0080609	63	8	8
+FBtr0080644	6	8	8
+FBtr0080646	3	12	12
+FBtr0080647	10	12	12
+FBtr0080660	7	11	11
+FBtr0080663	115	106	106
+FBtr0080664	128	387	387
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/more_table0.tabular	Tue Oct 09 17:14:57 2018 -0400
@@ -0,0 +1,31 @@
+gene	input1.bam
+FBtr0070001	57
+FBtr0070533	24
+FBtr0070603	70
+FBtr0070604	40
+FBtr0070911	2
+FBtr0078490	8
+FBtr0078580	1104
+FBtr0078790	14
+FBtr0079064	4
+FBtr0079090	6
+FBtr0079338	16
+FBtr0079528	106
+FBtr0079596	152
+FBtr0079677	4
+FBtr0079690	5
+FBtr0079692	6
+FBtr0079693	6
+FBtr0079694	7
+FBtr0079702	4
+FBtr0079728	8
+FBtr0079729	4
+FBtr0079752	8
+FBtr0079820	13
+FBtr0080609	63
+FBtr0080644	6
+FBtr0080646	3
+FBtr0080647	10
+FBtr0080660	7
+FBtr0080663	115
+FBtr0080664	128
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/more_table1.tabular	Tue Oct 09 17:14:57 2018 -0400
@@ -0,0 +1,31 @@
+gene	input2.bam
+FBtr0070001	24
+FBtr0070533	45
+FBtr0070603	61
+FBtr0070604	55
+FBtr0070911	3
+FBtr0078490	14
+FBtr0078580	370
+FBtr0078790	10
+FBtr0079064	5
+FBtr0079090	7
+FBtr0079338	23
+FBtr0079528	365
+FBtr0079596	316
+FBtr0079677	7
+FBtr0079690	5
+FBtr0079692	4
+FBtr0079693	9
+FBtr0079694	6
+FBtr0079702	4
+FBtr0079728	6
+FBtr0079729	4
+FBtr0079752	2
+FBtr0079820	109
+FBtr0080609	8
+FBtr0080644	8
+FBtr0080646	12
+FBtr0080647	12
+FBtr0080660	11
+FBtr0080663	106
+FBtr0080664	389
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/more_table2.tabular	Tue Oct 09 17:14:57 2018 -0400
@@ -0,0 +1,31 @@
+gene	input_new2.bam
+FBtr0070001	24
+FBtr0070533	45
+FBtr0070603	61
+FBtr0070604	55
+FBtr0070911	3
+FBtr0078490	14
+FBtr0078580	370
+FBtr0078790	10
+FBtr0079064	0
+FBtr0079090	7
+FBtr0079338	23
+FBtr0079528	365
+FBtr0079596	316
+FBtr0079677	7
+FBtr0079690	5
+FBtr0079692	4
+FBtr0079693	9
+FBtr0079694	6
+FBtr0079702	4
+FBtr0079728	6
+FBtr0079729	4
+FBtr0079752	2
+FBtr0079820	109
+FBtr0080609	8
+FBtr0080644	8
+FBtr0080646	12
+FBtr0080647	12
+FBtr0080660	11
+FBtr0080663	106
+FBtr0080664	389
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/table_antisense.tabular	Tue Oct 09 17:14:57 2018 -0400
@@ -0,0 +1,239 @@
+gene	alignment1.bam	alignment2.bam
+dme-bantam	0	0
+dme-let-7	0	0
+dme-mir-1	0	0
+dme-mir-10	0	0
+dme-mir-100	0	0
+dme-mir-1000	0	0
+dme-mir-1001	0	0
+dme-mir-1002	0	0
+dme-mir-1003	0	0
+dme-mir-1004	0	0
+dme-mir-1005	0	0
+dme-mir-1006	0	0
+dme-mir-1007	0	0
+dme-mir-1008	0	0
+dme-mir-1009	0	0
+dme-mir-1010	0	0
+dme-mir-1011	0	0
+dme-mir-1012	0	0
+dme-mir-1013	0	0
+dme-mir-1014	0	0
+dme-mir-1015	0	0
+dme-mir-1016	0	0
+dme-mir-1017	0	0
+dme-mir-11	0	0
+dme-mir-12	0	0
+dme-mir-124	0	0
+dme-mir-125	0	0
+dme-mir-133	0	0
+dme-mir-137	0	0
+dme-mir-13a	0	0
+dme-mir-13b-1	0	0
+dme-mir-13b-2	0	0
+dme-mir-14	0	0
+dme-mir-184	0	0
+dme-mir-190	0	0
+dme-mir-193	0	0
+dme-mir-210	0	0
+dme-mir-219	0	0
+dme-mir-2279	0	0
+dme-mir-2280	0	0
+dme-mir-2281	0	0
+dme-mir-2282	0	0
+dme-mir-2283	0	0
+dme-mir-2489	0	0
+dme-mir-2490	0	0
+dme-mir-2491	0	0
+dme-mir-2492	0	0
+dme-mir-2493	0	0
+dme-mir-2494	0	0
+dme-mir-2495	0	0
+dme-mir-2496	0	0
+dme-mir-2497	0	0
+dme-mir-2498	0	0
+dme-mir-2499	0	0
+dme-mir-2500	0	0
+dme-mir-2501	0	0
+dme-mir-252	0	0
+dme-mir-2535b	0	0
+dme-mir-263a	0	0
+dme-mir-263b	0	0
+dme-mir-274	0	0
+dme-mir-275	0	0
+dme-mir-276a	0	0
+dme-mir-276b	0	0
+dme-mir-277	0	0
+dme-mir-278	0	0
+dme-mir-279	0	0
+dme-mir-280	0	0
+dme-mir-281-1	0	0
+dme-mir-281-2	0	0
+dme-mir-282	0	0
+dme-mir-283	0	0
+dme-mir-284	0	0
+dme-mir-285	0	0
+dme-mir-286	0	0
+dme-mir-287	0	0
+dme-mir-288	0	0
+dme-mir-289	0	0
+dme-mir-2a-1	0	0
+dme-mir-2a-2	0	0
+dme-mir-2b-1	0	0
+dme-mir-2b-2	0	0
+dme-mir-2c	0	0
+dme-mir-3	0	0
+dme-mir-303	0	0
+dme-mir-304	0	0
+dme-mir-305	0	0
+dme-mir-306	0	0
+dme-mir-307a	0	0
+dme-mir-307b	0	0
+dme-mir-308	0	0
+dme-mir-309	0	0
+dme-mir-310	0	0
+dme-mir-311	0	0
+dme-mir-312	0	0
+dme-mir-313	0	0
+dme-mir-314	0	0
+dme-mir-315	0	0
+dme-mir-316	0	0
+dme-mir-317	0	0
+dme-mir-318	0	0
+dme-mir-31a	0	0
+dme-mir-31b	0	0
+dme-mir-33	0	0
+dme-mir-34	0	0
+dme-mir-3641	0	0
+dme-mir-3642	0	0
+dme-mir-3643	0	0
+dme-mir-3644	0	0
+dme-mir-3645	0	0
+dme-mir-375	0	0
+dme-mir-4	0	0
+dme-mir-4908	0	0
+dme-mir-4909	0	0
+dme-mir-4910	0	0
+dme-mir-4911	0	0
+dme-mir-4912	0	0
+dme-mir-4913	0	0
+dme-mir-4914	0	0
+dme-mir-4915	0	0
+dme-mir-4916	0	0
+dme-mir-4917	0	0
+dme-mir-4918	0	0
+dme-mir-4919	0	0
+dme-mir-4939	0	0
+dme-mir-4940	0	0
+dme-mir-4941	0	0
+dme-mir-4942	0	0
+dme-mir-4943	0	0
+dme-mir-4944	0	0
+dme-mir-4945	0	0
+dme-mir-4946	0	0
+dme-mir-4947	0	0
+dme-mir-4948	0	0
+dme-mir-4949	0	0
+dme-mir-4950	0	0
+dme-mir-4951	0	0
+dme-mir-4952	0	0
+dme-mir-4953	0	0
+dme-mir-4954	0	0
+dme-mir-4955	0	0
+dme-mir-4956	0	0
+dme-mir-4957	0	0
+dme-mir-4958	0	0
+dme-mir-4959	0	0
+dme-mir-4960	0	0
+dme-mir-4961	0	0
+dme-mir-4962	0	0
+dme-mir-4963	0	0
+dme-mir-4964	0	0
+dme-mir-4965	0	0
+dme-mir-4966	0	0
+dme-mir-4967	0	0
+dme-mir-4968	0	0
+dme-mir-4969	0	0
+dme-mir-4970	0	0
+dme-mir-4971	0	0
+dme-mir-4972	0	0
+dme-mir-4973	0	0
+dme-mir-4974	0	0
+dme-mir-4975	0	0
+dme-mir-4976	0	0
+dme-mir-4977	0	0
+dme-mir-4978	0	0
+dme-mir-4979	0	0
+dme-mir-4980	0	0
+dme-mir-4981	0	0
+dme-mir-4982	0	0
+dme-mir-4983	0	0
+dme-mir-4984	0	0
+dme-mir-4985	0	0
+dme-mir-4986	0	0
+dme-mir-4987	0	0
+dme-mir-5	0	0
+dme-mir-6-1	0	0
+dme-mir-6-2	0	0
+dme-mir-6-3	0	0
+dme-mir-7	0	0
+dme-mir-79	0	0
+dme-mir-8	0	0
+dme-mir-87	0	0
+dme-mir-927	0	0
+dme-mir-929	0	0
+dme-mir-92a	0	0
+dme-mir-92b	0	0
+dme-mir-932	0	0
+dme-mir-954	0	0
+dme-mir-955	0	0
+dme-mir-956	0	0
+dme-mir-957	0	0
+dme-mir-958	0	0
+dme-mir-959	0	0
+dme-mir-960	0	0
+dme-mir-961	0	0
+dme-mir-962	0	0
+dme-mir-963	0	0
+dme-mir-964	0	0
+dme-mir-965	0	0
+dme-mir-966	0	0
+dme-mir-967	0	0
+dme-mir-968	0	0
+dme-mir-969	0	0
+dme-mir-970	0	0
+dme-mir-971	0	0
+dme-mir-972	0	0
+dme-mir-973	0	0
+dme-mir-974	0	0
+dme-mir-975	0	0
+dme-mir-976	0	0
+dme-mir-977	0	0
+dme-mir-978	0	0
+dme-mir-979	0	0
+dme-mir-980	0	0
+dme-mir-981	0	0
+dme-mir-982	0	0
+dme-mir-983-1	0	0
+dme-mir-983-2	0	0
+dme-mir-984	0	0
+dme-mir-985	0	0
+dme-mir-986	0	0
+dme-mir-987	0	0
+dme-mir-988	0	0
+dme-mir-989	0	0
+dme-mir-990	0	0
+dme-mir-991	0	0
+dme-mir-992	0	0
+dme-mir-993	0	0
+dme-mir-994	0	0
+dme-mir-995	0	0
+dme-mir-996	0	0
+dme-mir-997	0	0
+dme-mir-998	0	0
+dme-mir-999	0	0
+dme-mir-9a	0	0
+dme-mir-9b	0	0
+dme-mir-9c	0	0
+dme-mir-iab-4	0	0
+dme-mir-iab-8	0	0