changeset 18:2c95c899d0a4 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 06c6feefeaaa66f7dec1f6143a2c5aaf91836320
author artbio
date Thu, 22 Nov 2018 03:07:41 -0500
parents b28dcd4051e8
children f33afecac67a
files small_rna_maps.py small_rna_maps.xml test-data/bed1.bed test-data/bed2.bed test-data/bed3.bed
diffstat 5 files changed, 325 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/small_rna_maps.py	Thu Nov 15 12:29:57 2018 -0500
+++ b/small_rna_maps.py	Thu Nov 22 03:07:41 2018 -0500
@@ -19,6 +19,14 @@
     the_parser.add_argument('--sample_names', dest='sample_names',
                             required=True, nargs='+',
                             help='list of sample names')
+    the_parser.add_argument('--bed', dest='bed', required=False,
+                            help='Name of bed output must be specified\
+                            if --cluster option used')
+    the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster',
+                            required=False, type=int, default=0,
+                            help='Skip clusters of size equal or less than\
+                            specified integer in the bed output. \
+                            Default = 1')
     the_parser.add_argument('--outputs', nargs='+', action='store',
                             help='list of two output paths (only two)')
     the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store',
@@ -269,27 +277,40 @@
                     line = [str(i) for i in line]
                     out.write('\t'.join(line) + '\n')
 
-    def write_cluster_table(self, clustered_dic, out):
+    def write_cluster_table(self, clustered_dic, out, bedpath, skip):
         '''
         Writer of a tabular file
         Dataset, Chromosome, Chrom_length, Coordinate, Polarity,
         <some mapped value>
         out is an *open* file handler
+        bed is an a file handler internal to the function
         '''
+        bed = open(bedpath, 'w')
         for key in sorted(clustered_dic):
             start = clustered_dic[key][1][0]
             end = clustered_dic[key][1][1]
             size = end - start + 1
+            if self.nostrand:
+                polarity = '.'
+            elif key[2] == 'F':
+                polarity = '+'
+            else:
+                polarity = '-'
             density = float(clustered_dic[key][0]) / size
             line = [self.sample_name, key[0], self.chromosomes[key[0]],
                     key[1], key[2], clustered_dic[key][0],
                     str(start) + "-" + str(end), str(size), str(density)]
             line = [str(i) for i in line]
+            if size > skip:
+                bedline = [key[0], str(start-1), str(end), 'cluster', '.',
+                           polarity]
+                bed.write('\t'.join(bedline) + '\n')
             out.write('\t'.join(line) + '\n')
+        bed.close()
 
 
 def main(inputs, samples, methods, outputs, minsize, maxsize, cluster,
-         nostrand):
+         nostrand, bedfile=None, bed_skipcluster=0):
     for method, output in zip(methods, outputs):
         out = open(output, 'w')
         if method == 'Size':
@@ -312,10 +333,10 @@
                      "Size": mapobj.compute_size,
                      "cluster": mapobj.write_cluster_table}
             if cluster:
-                token["cluster"](mapobj.map_dict, out)
+                token["cluster"](mapobj.map_dict, out, bedfile,
+                                 bed_skipcluster)
             else:
                 token[method](mapobj.map_dict, out)
-            #   mapobj.compute_coverage(mapobj.map_dict, out)
         out.close()
 
 
@@ -326,4 +347,5 @@
         args.sample_names = [name + '_' + str(i) for
                              i, name in enumerate(args.sample_names)]
     main(args.inputs, args.sample_names, args.plot_methods, args.outputs,
-         args.minsize, args.maxsize, args.cluster, args.nostrand)
+         args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed,
+         args.bed_skipcluster)
--- a/small_rna_maps.xml	Thu Nov 15 12:29:57 2018 -0500
+++ b/small_rna_maps.xml	Thu Nov 22 03:07:41 2018 -0500
@@ -1,4 +1,4 @@
-<tool id="small_rna_maps" name="small_rna_maps" version="2.8.0">
+<tool id="small_rna_maps" name="small_rna_maps" version="2.9.0">
   <description></description>
   <requirements>
         <requirement type="package" version="1.11.2=py27_0">numpy</requirement>
@@ -38,11 +38,14 @@
                   --plot_methods 'Counts'
                   --outputs '$output_tab'
                   --cluster ${plots_options.cluster}
+                  --bed '$output_bed'
+                  --bed_skipcluster ${plots_options.skip_cluster}
                   ${plots_options.strandness} &&
               #else:
                   --plot_methods '${plots_options.first_plot}'
                   --outputs '$output_tab' &&
               #end if
+              
 
           Rscript '$__tool_directory__'/small_rna_maps.r
               --first_dataframe '$output_tab'
@@ -128,6 +131,8 @@
                    help="Sets the distance (in nt) below which reads are clustered to a single median position" />
             <param name="strandness" argument="--nostrand" type="boolean" truevalue="--nostrand" falsevalue="" checked="false"
                    label="Ignore polarity of reads ?" help="Set if you wish to cluster reads regardless of whether they are forward or reverse"/>
+            <param name="skip_cluster" type="integer" label="do not report cluster equal or less than the specified number of nucleotides" value="0"
+                   help="Set the number of nucleotides in the cluster for which and below which a cluster is not reported in the bed output. Set to 0 (default) reports all including singlets" />
         </when>
     </conditional>
     <conditional name="ylimits_cond">
@@ -144,6 +149,9 @@
 
  <outputs>
     <data format="tabular" name="output_tab" label="$plots_options.first_plot dataframe" />
+    <data format="bed" name="output_bed" label="bed file for clusters" >
+        <filter>plots_options['plots_options_selector'] == 'cluster'</filter>
+    </data>
     <data format="tabular" name="extra_output_tab" label="$plots_options.extra_plot dataframe">
         <filter>plots_options['plots_options_selector'] == 'two_plot'</filter>
     </data>
@@ -192,9 +200,11 @@
             <param name="plots_options_selector" value="cluster" />
             <param name="first_plot" value="Counts" />
             <param name="cluster" value="5" />
+            <param name="skip_cluster" value="0" />
             <param name="strandness" value="false" />
             <output file="clustering.tab" name="output_tab" />
             <output file="clustering.pdf" name="output_pdf" />
+            <output file="bed1.bed" name="output_bed" />
         </test>
         <test> <!-- 3 -->
             <repeat name="series">
@@ -206,9 +216,27 @@
             <param name="plots_options_selector" value="cluster" />
             <param name="first_plot" value="Counts" />
             <param name="cluster" value="5" />
+            <param name="skip_cluster" value="0" />
             <param name="strandness" value="true" />
             <output file="clustering_unstranded.tab" name="output_tab" />
             <output file="clustering_unstranded.pdf" name="output_pdf" />
+            <output file="bed2.bed" name="output_bed" />
+        </test>
+        <test> <!-- 3.1 -->
+            <repeat name="series">
+                <param name="inputs" value="input1.bam" ftype="bam" />
+                <param name="normalization" value="1.0" />
+            </repeat>
+            <param name="minsize" value="0" />
+            <param name="maxsize" value="10000" />
+            <param name="plots_options_selector" value="cluster" />
+            <param name="first_plot" value="Counts" />
+            <param name="cluster" value="5" />
+            <param name="skip_cluster" value="1" />
+            <param name="strandness" value="false" />
+            <output file="clustering.tab" name="output_tab" />
+            <output file="clustering.pdf" name="output_pdf" />
+            <output file="bed3.bed" name="output_bed" />
         </test>
         <test> <!-- 4 -->
             <repeat name="series">
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bed1.bed	Thu Nov 22 03:07:41 2018 -0500
@@ -0,0 +1,118 @@
+FBtr0070001	0	1	cluster	.	+
+FBtr0070001	6	18	cluster	.	+
+FBtr0070001	26	34	cluster	.	+
+FBtr0070001	71	72	cluster	.	+
+FBtr0070533	0	1	cluster	.	+
+FBtr0070533	11	38	cluster	.	+
+FBtr0070533	71	72	cluster	.	+
+FBtr0070603	0	42	cluster	.	+
+FBtr0070603	50	56	cluster	.	+
+FBtr0070603	71	72	cluster	.	+
+FBtr0070604	0	1	cluster	.	+
+FBtr0070604	17	22	cluster	.	+
+FBtr0070604	29	32	cluster	.	+
+FBtr0070604	56	57	cluster	.	+
+FBtr0070604	71	72	cluster	.	+
+FBtr0070911	0	1	cluster	.	+
+FBtr0070911	14	15	cluster	.	+
+FBtr0070911	37	38	cluster	.	+
+FBtr0070911	72	73	cluster	.	+
+FBtr0078490	0	1	cluster	.	+
+FBtr0078490	12	18	cluster	.	+
+FBtr0078490	24	28	cluster	.	+
+FBtr0078490	37	38	cluster	.	+
+FBtr0078490	71	72	cluster	.	+
+FBtr0078580	0	31	cluster	.	+
+FBtr0078580	51	52	cluster	.	+
+FBtr0078580	71	72	cluster	.	+
+FBtr0078790	0	1	cluster	.	+
+FBtr0078790	16	17	cluster	.	+
+FBtr0078790	32	33	cluster	.	+
+FBtr0078790	41	52	cluster	.	+
+FBtr0078790	68	69	cluster	.	-
+FBtr0078790	72	73	cluster	.	+
+FBtr0079064	0	3	cluster	.	+
+FBtr0079064	32	33	cluster	.	+
+FBtr0079064	51	52	cluster	.	+
+FBtr0079064	71	72	cluster	.	+
+FBtr0079090	0	1	cluster	.	+
+FBtr0079090	25	26	cluster	.	+
+FBtr0079090	32	33	cluster	.	+
+FBtr0079090	52	53	cluster	.	+
+FBtr0079090	55	56	cluster	.	-
+FBtr0079090	71	72	cluster	.	+
+FBtr0079338	0	1	cluster	.	+
+FBtr0079338	11	17	cluster	.	+
+FBtr0079338	24	25	cluster	.	+
+FBtr0079338	41	46	cluster	.	+
+FBtr0079338	72	73	cluster	.	+
+FBtr0079528	0	18	cluster	.	+
+FBtr0079528	27	28	cluster	.	+
+FBtr0079528	34	37	cluster	.	+
+FBtr0079528	50	51	cluster	.	+
+FBtr0079528	70	71	cluster	.	+
+FBtr0079596	0	19	cluster	.	+
+FBtr0079596	52	54	cluster	.	+
+FBtr0079596	72	73	cluster	.	+
+FBtr0079677	0	5	cluster	.	+
+FBtr0079677	51	53	cluster	.	+
+FBtr0079677	71	72	cluster	.	+
+FBtr0079690	0	1	cluster	.	+
+FBtr0079690	21	27	cluster	.	+
+FBtr0079690	32	33	cluster	.	+
+FBtr0079690	71	72	cluster	.	+
+FBtr0079692	0	1	cluster	.	+
+FBtr0079692	17	18	cluster	.	+
+FBtr0079692	24	25	cluster	.	+
+FBtr0079692	31	32	cluster	.	+
+FBtr0079692	72	73	cluster	.	+
+FBtr0079693	0	1	cluster	.	+
+FBtr0079693	24	25	cluster	.	+
+FBtr0079693	71	72	cluster	.	+
+FBtr0079694	0	1	cluster	.	+
+FBtr0079694	17	18	cluster	.	+
+FBtr0079694	51	52	cluster	.	+
+FBtr0079694	71	72	cluster	.	+
+FBtr0079702	0	1	cluster	.	+
+FBtr0079702	18	19	cluster	.	+
+FBtr0079702	55	56	cluster	.	+
+FBtr0079702	71	72	cluster	.	+
+FBtr0079728	0	1	cluster	.	+
+FBtr0079728	7	8	cluster	.	+
+FBtr0079728	18	19	cluster	.	+
+FBtr0079728	32	33	cluster	.	+
+FBtr0079728	55	56	cluster	.	+
+FBtr0079728	71	72	cluster	.	+
+FBtr0079729	0	1	cluster	.	+
+FBtr0079729	12	13	cluster	.	+
+FBtr0079729	51	57	cluster	.	+
+FBtr0079729	71	72	cluster	.	+
+FBtr0079752	0	1	cluster	.	+
+FBtr0079752	6	12	cluster	.	+
+FBtr0079752	32	33	cluster	.	+
+FBtr0079752	51	52	cluster	.	+
+FBtr0079752	71	72	cluster	.	+
+FBtr0079820	0	1	cluster	.	+
+FBtr0079820	44	56	cluster	.	+
+FBtr0079820	73	74	cluster	.	+
+FBtr0080609	0	20	cluster	.	+
+FBtr0080609	41	42	cluster	.	+
+FBtr0080609	50	52	cluster	.	+
+FBtr0080609	71	72	cluster	.	+
+FBtr0080644	0	12	cluster	.	+
+FBtr0080644	71	72	cluster	.	+
+FBtr0080646	0	7	cluster	.	+
+FBtr0080646	71	72	cluster	.	+
+FBtr0080647	0	12	cluster	.	+
+FBtr0080647	71	72	cluster	.	+
+FBtr0080660	0	7	cluster	.	+
+FBtr0080660	71	72	cluster	.	+
+FBtr0080663	0	17	cluster	.	+
+FBtr0080663	25	26	cluster	.	+
+FBtr0080663	31	32	cluster	.	+
+FBtr0080663	49	51	cluster	.	+
+FBtr0080663	70	71	cluster	.	+
+FBtr0080664	0	21	cluster	.	+
+FBtr0080664	27	38	cluster	.	+
+FBtr0080664	49	56	cluster	.	+
+FBtr0080664	70	71	cluster	.	+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bed2.bed	Thu Nov 22 03:07:41 2018 -0500
@@ -0,0 +1,116 @@
+FBtr0070001	0	1	cluster	.	.
+FBtr0070001	6	18	cluster	.	.
+FBtr0070001	26	34	cluster	.	.
+FBtr0070001	71	72	cluster	.	.
+FBtr0070533	0	1	cluster	.	.
+FBtr0070533	11	38	cluster	.	.
+FBtr0070533	71	72	cluster	.	.
+FBtr0070603	0	42	cluster	.	.
+FBtr0070603	50	56	cluster	.	.
+FBtr0070603	71	72	cluster	.	.
+FBtr0070604	0	1	cluster	.	.
+FBtr0070604	17	22	cluster	.	.
+FBtr0070604	29	32	cluster	.	.
+FBtr0070604	56	57	cluster	.	.
+FBtr0070604	71	72	cluster	.	.
+FBtr0070911	0	1	cluster	.	.
+FBtr0070911	14	15	cluster	.	.
+FBtr0070911	37	38	cluster	.	.
+FBtr0070911	72	73	cluster	.	.
+FBtr0078490	0	1	cluster	.	.
+FBtr0078490	12	18	cluster	.	.
+FBtr0078490	24	28	cluster	.	.
+FBtr0078490	37	38	cluster	.	.
+FBtr0078490	71	72	cluster	.	.
+FBtr0078580	0	31	cluster	.	.
+FBtr0078580	51	52	cluster	.	.
+FBtr0078580	71	72	cluster	.	.
+FBtr0078790	0	1	cluster	.	.
+FBtr0078790	16	17	cluster	.	.
+FBtr0078790	32	33	cluster	.	.
+FBtr0078790	41	52	cluster	.	.
+FBtr0078790	68	73	cluster	.	.
+FBtr0079064	0	3	cluster	.	.
+FBtr0079064	32	33	cluster	.	.
+FBtr0079064	51	52	cluster	.	.
+FBtr0079064	71	72	cluster	.	.
+FBtr0079090	0	1	cluster	.	.
+FBtr0079090	25	26	cluster	.	.
+FBtr0079090	32	33	cluster	.	.
+FBtr0079090	52	56	cluster	.	.
+FBtr0079090	71	72	cluster	.	.
+FBtr0079338	0	1	cluster	.	.
+FBtr0079338	11	17	cluster	.	.
+FBtr0079338	24	25	cluster	.	.
+FBtr0079338	41	46	cluster	.	.
+FBtr0079338	72	73	cluster	.	.
+FBtr0079528	0	18	cluster	.	.
+FBtr0079528	27	28	cluster	.	.
+FBtr0079528	34	37	cluster	.	.
+FBtr0079528	50	51	cluster	.	.
+FBtr0079528	70	71	cluster	.	.
+FBtr0079596	0	19	cluster	.	.
+FBtr0079596	52	54	cluster	.	.
+FBtr0079596	72	73	cluster	.	.
+FBtr0079677	0	5	cluster	.	.
+FBtr0079677	51	53	cluster	.	.
+FBtr0079677	71	72	cluster	.	.
+FBtr0079690	0	1	cluster	.	.
+FBtr0079690	21	27	cluster	.	.
+FBtr0079690	32	33	cluster	.	.
+FBtr0079690	71	72	cluster	.	.
+FBtr0079692	0	1	cluster	.	.
+FBtr0079692	17	18	cluster	.	.
+FBtr0079692	24	25	cluster	.	.
+FBtr0079692	31	32	cluster	.	.
+FBtr0079692	72	73	cluster	.	.
+FBtr0079693	0	1	cluster	.	.
+FBtr0079693	24	25	cluster	.	.
+FBtr0079693	71	72	cluster	.	.
+FBtr0079694	0	1	cluster	.	.
+FBtr0079694	17	18	cluster	.	.
+FBtr0079694	51	52	cluster	.	.
+FBtr0079694	71	72	cluster	.	.
+FBtr0079702	0	1	cluster	.	.
+FBtr0079702	18	19	cluster	.	.
+FBtr0079702	55	56	cluster	.	.
+FBtr0079702	71	72	cluster	.	.
+FBtr0079728	0	1	cluster	.	.
+FBtr0079728	7	8	cluster	.	.
+FBtr0079728	18	19	cluster	.	.
+FBtr0079728	32	33	cluster	.	.
+FBtr0079728	55	56	cluster	.	.
+FBtr0079728	71	72	cluster	.	.
+FBtr0079729	0	1	cluster	.	.
+FBtr0079729	12	13	cluster	.	.
+FBtr0079729	51	57	cluster	.	.
+FBtr0079729	71	72	cluster	.	.
+FBtr0079752	0	1	cluster	.	.
+FBtr0079752	6	12	cluster	.	.
+FBtr0079752	32	33	cluster	.	.
+FBtr0079752	51	52	cluster	.	.
+FBtr0079752	71	72	cluster	.	.
+FBtr0079820	0	1	cluster	.	.
+FBtr0079820	44	56	cluster	.	.
+FBtr0079820	73	74	cluster	.	.
+FBtr0080609	0	20	cluster	.	.
+FBtr0080609	41	42	cluster	.	.
+FBtr0080609	50	52	cluster	.	.
+FBtr0080609	71	72	cluster	.	.
+FBtr0080644	0	12	cluster	.	.
+FBtr0080644	71	72	cluster	.	.
+FBtr0080646	0	7	cluster	.	.
+FBtr0080646	71	72	cluster	.	.
+FBtr0080647	0	12	cluster	.	.
+FBtr0080647	71	72	cluster	.	.
+FBtr0080660	0	7	cluster	.	.
+FBtr0080660	71	72	cluster	.	.
+FBtr0080663	0	17	cluster	.	.
+FBtr0080663	25	26	cluster	.	.
+FBtr0080663	31	32	cluster	.	.
+FBtr0080663	49	51	cluster	.	.
+FBtr0080663	70	71	cluster	.	.
+FBtr0080664	0	21	cluster	.	.
+FBtr0080664	27	38	cluster	.	.
+FBtr0080664	49	56	cluster	.	.
+FBtr0080664	70	71	cluster	.	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bed3.bed	Thu Nov 22 03:07:41 2018 -0500
@@ -0,0 +1,35 @@
+FBtr0070001	6	18	cluster	.	+
+FBtr0070001	26	34	cluster	.	+
+FBtr0070533	11	38	cluster	.	+
+FBtr0070603	0	42	cluster	.	+
+FBtr0070603	50	56	cluster	.	+
+FBtr0070604	17	22	cluster	.	+
+FBtr0070604	29	32	cluster	.	+
+FBtr0078490	12	18	cluster	.	+
+FBtr0078490	24	28	cluster	.	+
+FBtr0078580	0	31	cluster	.	+
+FBtr0078790	41	52	cluster	.	+
+FBtr0079064	0	3	cluster	.	+
+FBtr0079338	11	17	cluster	.	+
+FBtr0079338	41	46	cluster	.	+
+FBtr0079528	0	18	cluster	.	+
+FBtr0079528	34	37	cluster	.	+
+FBtr0079596	0	19	cluster	.	+
+FBtr0079596	52	54	cluster	.	+
+FBtr0079677	0	5	cluster	.	+
+FBtr0079677	51	53	cluster	.	+
+FBtr0079690	21	27	cluster	.	+
+FBtr0079729	51	57	cluster	.	+
+FBtr0079752	6	12	cluster	.	+
+FBtr0079820	44	56	cluster	.	+
+FBtr0080609	0	20	cluster	.	+
+FBtr0080609	50	52	cluster	.	+
+FBtr0080644	0	12	cluster	.	+
+FBtr0080646	0	7	cluster	.	+
+FBtr0080647	0	12	cluster	.	+
+FBtr0080660	0	7	cluster	.	+
+FBtr0080663	0	17	cluster	.	+
+FBtr0080663	49	51	cluster	.	+
+FBtr0080664	0	21	cluster	.	+
+FBtr0080664	27	38	cluster	.	+
+FBtr0080664	49	56	cluster	.	+