Mercurial > repos > artbio > small_rna_maps
changeset 17:b28dcd4051e8 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 16f15e5ab2b79590a8ae410f76434aa6690c1fc4
author | artbio |
---|---|
date | Thu, 15 Nov 2018 12:29:57 -0500 |
parents | 600e2498bd21 |
children | 2c95c899d0a4 |
files | small_rna_maps.py small_rna_maps.xml test-data/clustering_unstranded.pdf test-data/clustering_unstranded.tab |
diffstat | 4 files changed, 173 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/small_rna_maps.py Tue Nov 13 17:03:46 2018 -0500 +++ b/small_rna_maps.py Thu Nov 15 12:29:57 2018 -0500 @@ -24,26 +24,33 @@ the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', help='list of 2 plot methods (only two) among:\ Counts, Max, Mean, Median, Coverage and Size') + the_parser.add_argument('--nostrand', action='store_true', + help='Consider reads regardless their polarity') + args = the_parser.parse_args() return args class Map: - def __init__(self, bam_file, sample, minsize, maxsize, cluster): + def __init__(self, bam_file, sample, minsize, maxsize, cluster, nostrand): self.sample_name = sample self.minsize = minsize self.maxsize = maxsize self.cluster = cluster + if not nostrand: + self.nostrand = False + else: + self.nostrand = True self.bam_object = pysam.AlignmentFile(bam_file, 'rb') self.chromosomes = dict(zip(self.bam_object.references, self.bam_object.lengths)) self.map_dict = self.create_map(self.bam_object, self.minsize, - self.maxsize) + self.maxsize, self.nostrand) if self.cluster: self.map_dict = self.tile_map(self.map_dict, self.cluster) - def create_map(self, bam_object, minsize, maxsize): + def create_map(self, bam_object, minsize, maxsize, nostrand=False): ''' Returns a map_dictionary {(chromosome,read_position,polarity): [read_length, ...]} @@ -53,14 +60,24 @@ # get empty value for start and end of each chromosome map_dictionary[(chrom, 1, 'F')] = [] map_dictionary[(chrom, self.chromosomes[chrom], 'F')] = [] - for read in bam_object.fetch(chrom): - positions = read.positions # a list of covered positions - if read.is_reverse: - map_dictionary[(chrom, positions[-1]+1, 'R')].append( - read.query_alignment_length) - else: - map_dictionary[(chrom, positions[0]+1, 'F')].append( - read.query_alignment_length) + if not nostrand: + for read in bam_object.fetch(chrom): + positions = read.positions # a list of covered positions + if read.is_reverse: + map_dictionary[(chrom, positions[-1]+1, 'R')].append( + read.query_alignment_length) + else: + map_dictionary[(chrom, positions[0]+1, 'F')].append( + read.query_alignment_length) + else: + for read in bam_object.fetch(chrom): + positions = read.positions # a list of covered positions + if read.is_reverse: + map_dictionary[(chrom, positions[-1]+1, 'F')].append( + read.query_alignment_length) + else: + map_dictionary[(chrom, positions[0]+1, 'F')].append( + read.query_alignment_length) return map_dictionary def grouper(self, iterable, clust_distance): @@ -271,7 +288,8 @@ out.write('\t'.join(line) + '\n') -def main(inputs, samples, methods, outputs, minsize, maxsize, cluster): +def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, + nostrand): for method, output in zip(methods, outputs): out = open(output, 'w') if method == 'Size': @@ -285,7 +303,7 @@ "Polarity", method] out.write('\t'.join(header) + '\n') for input, sample in zip(inputs, samples): - mapobj = Map(input, sample, minsize, maxsize, cluster) + mapobj = Map(input, sample, minsize, maxsize, cluster, nostrand) token = {"Counts": mapobj.compute_readcount, "Max": mapobj.compute_max, "Mean": mapobj.compute_mean, @@ -308,4 +326,4 @@ args.sample_names = [name + '_' + str(i) for i, name in enumerate(args.sample_names)] main(args.inputs, args.sample_names, args.plot_methods, args.outputs, - args.minsize, args.maxsize, args.cluster) + args.minsize, args.maxsize, args.cluster, args.nostrand)
--- a/small_rna_maps.xml Tue Nov 13 17:03:46 2018 -0500 +++ b/small_rna_maps.xml Thu Nov 15 12:29:57 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="small_rna_maps" name="small_rna_maps" version="2.7.0"> +<tool id="small_rna_maps" name="small_rna_maps" version="2.8.0"> <description></description> <requirements> <requirement type="package" version="1.11.2=py27_0">numpy</requirement> @@ -37,7 +37,8 @@ #elif str($plots_options.plots_options_selector ) == "cluster": --plot_methods 'Counts' --outputs '$output_tab' - --cluster ${plots_options.cluster} && + --cluster ${plots_options.cluster} + ${plots_options.strandness} && #else: --plot_methods '${plots_options.first_plot}' --outputs '$output_tab' && @@ -125,6 +126,8 @@ <param name="first_plot" type="hidden" value="Counts"/> <param name="cluster" type="integer" label="Aggregation distance in nucleotides" value="1" help="Sets the distance (in nt) below which reads are clustered to a single median position" /> + <param name="strandness" argument="--nostrand" type="boolean" truevalue="--nostrand" falsevalue="" checked="false" + label="Ignore polarity of reads ?" help="Set if you wish to cluster reads regardless of whether they are forward or reverse"/> </when> </conditional> <conditional name="ylimits_cond"> @@ -148,7 +151,7 @@ </outputs> <tests> - <test> <!-- 1 --> + <test> <!-- 0 --> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> @@ -164,7 +167,7 @@ <output file="input1_input2new_norm_1_2_counts.tab" name="output_tab" /> <output file="input1_input2new_norm_1_2_single_plot_counts.pdf" name="output_pdf" /> </test> - <test> <!-- 2 --> + <test> <!-- 1 --> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> <param name="normalization" value="1.0" /> @@ -179,6 +182,20 @@ <output file="input1_counts_yminneg5_5.tab" name="output_tab" /> <output file="input1_yminneg5_5_single_plot_counts.pdf" name="output_pdf" /> </test> + <test> <!-- 2 --> + <repeat name="series"> + <param name="inputs" value="input1.bam" ftype="bam" /> + <param name="normalization" value="1.0" /> + </repeat> + <param name="minsize" value="0" /> + <param name="maxsize" value="10000" /> + <param name="plots_options_selector" value="cluster" /> + <param name="first_plot" value="Counts" /> + <param name="cluster" value="5" /> + <param name="strandness" value="false" /> + <output file="clustering.tab" name="output_tab" /> + <output file="clustering.pdf" name="output_pdf" /> + </test> <test> <!-- 3 --> <repeat name="series"> <param name="inputs" value="input1.bam" ftype="bam" /> @@ -189,8 +206,9 @@ <param name="plots_options_selector" value="cluster" /> <param name="first_plot" value="Counts" /> <param name="cluster" value="5" /> - <output file="clustering.tab" name="output_tab" /> - <output file="clustering.pdf" name="output_pdf" /> + <param name="strandness" value="true" /> + <output file="clustering_unstranded.tab" name="output_tab" /> + <output file="clustering_unstranded.pdf" name="output_pdf" /> </test> <test> <!-- 4 --> <repeat name="series">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/clustering_unstranded.tab Thu Nov 15 12:29:57 2018 -0500 @@ -0,0 +1,117 @@ +Dataset Chromosome Chrom_length Coordinate Polarity Counts Start-End Cluster Size density +input1.bam FBtr0070001 72 1 F 1 1-1 1 1.0 +input1.bam FBtr0070001 72 12 F 14 7-18 12 1.16666666667 +input1.bam FBtr0070001 72 30 F 42 27-34 8 5.25 +input1.bam FBtr0070001 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0070533 72 1 F 1 1-1 1 1.0 +input1.bam FBtr0070533 72 25 F 23 12-38 27 0.851851851852 +input1.bam FBtr0070533 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0070603 72 21 F 68 1-42 42 1.61904761905 +input1.bam FBtr0070603 72 53 F 2 51-56 6 0.333333333333 +input1.bam FBtr0070603 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0070604 72 1 F 1 1-1 1 1.0 +input1.bam FBtr0070604 72 20 F 2 18-22 5 0.4 +input1.bam FBtr0070604 72 31 F 36 30-32 3 12.0 +input1.bam FBtr0070604 72 57 F 1 57-57 1 1.0 +input1.bam FBtr0070604 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0070911 73 1 F 0 1-1 1 0.0 +input1.bam FBtr0070911 73 15 F 1 15-15 1 1.0 +input1.bam FBtr0070911 73 38 F 1 38-38 1 1.0 +input1.bam FBtr0070911 73 73 F 0 73-73 1 0.0 +input1.bam FBtr0078490 72 1 F 0 1-1 1 0.0 +input1.bam FBtr0078490 72 15 F 4 13-18 6 0.666666666667 +input1.bam FBtr0078490 72 26 F 3 25-28 4 0.75 +input1.bam FBtr0078490 72 38 F 1 38-38 1 1.0 +input1.bam FBtr0078490 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0078580 72 16 F 1102 1-31 31 35.5483870968 +input1.bam FBtr0078580 72 52 F 2 52-52 1 2.0 +input1.bam FBtr0078580 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0078790 73 1 F 1 1-1 1 1.0 +input1.bam FBtr0078790 73 17 F 2 17-17 1 2.0 +input1.bam FBtr0078790 73 33 F 1 33-33 1 1.0 +input1.bam FBtr0078790 73 47 F 9 42-52 11 0.818181818182 +input1.bam FBtr0078790 73 71 F 1 69-73 5 0.2 +input1.bam FBtr0079064 72 2 F 2 1-3 3 0.666666666667 +input1.bam FBtr0079064 72 33 F 1 33-33 1 1.0 +input1.bam FBtr0079064 72 52 F 1 52-52 1 1.0 +input1.bam FBtr0079064 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079090 72 1 F 2 1-1 1 2.0 +input1.bam FBtr0079090 72 26 F 1 26-26 1 1.0 +input1.bam FBtr0079090 72 33 F 1 33-33 1 1.0 +input1.bam FBtr0079090 72 54 F 2 53-56 4 0.5 +input1.bam FBtr0079090 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079338 73 1 F 0 1-1 1 0.0 +input1.bam FBtr0079338 73 14 F 5 12-17 6 0.833333333333 +input1.bam FBtr0079338 73 25 F 1 25-25 1 1.0 +input1.bam FBtr0079338 73 44 F 10 42-46 5 2.0 +input1.bam FBtr0079338 73 73 F 0 73-73 1 0.0 +input1.bam FBtr0079528 71 9 F 97 1-18 18 5.38888888889 +input1.bam FBtr0079528 71 28 F 1 28-28 1 1.0 +input1.bam FBtr0079528 71 36 F 3 35-37 3 1.0 +input1.bam FBtr0079528 71 51 F 5 51-51 1 5.0 +input1.bam FBtr0079528 71 71 F 0 71-71 1 0.0 +input1.bam FBtr0079596 73 10 F 148 1-19 19 7.78947368421 +input1.bam FBtr0079596 73 53 F 4 53-54 2 2.0 +input1.bam FBtr0079596 73 73 F 0 73-73 1 0.0 +input1.bam FBtr0079677 72 3 F 2 1-5 5 0.4 +input1.bam FBtr0079677 72 52 F 2 52-53 2 1.0 +input1.bam FBtr0079677 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079690 72 1 F 1 1-1 1 1.0 +input1.bam FBtr0079690 72 24 F 2 22-27 6 0.333333333333 +input1.bam FBtr0079690 72 33 F 2 33-33 1 2.0 +input1.bam FBtr0079690 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079692 73 1 F 3 1-1 1 3.0 +input1.bam FBtr0079692 73 18 F 1 18-18 1 1.0 +input1.bam FBtr0079692 73 25 F 1 25-25 1 1.0 +input1.bam FBtr0079692 73 32 F 1 32-32 1 1.0 +input1.bam FBtr0079692 73 73 F 0 73-73 1 0.0 +input1.bam FBtr0079693 72 1 F 5 1-1 1 5.0 +input1.bam FBtr0079693 72 25 F 1 25-25 1 1.0 +input1.bam FBtr0079693 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079694 72 1 F 5 1-1 1 5.0 +input1.bam FBtr0079694 72 18 F 1 18-18 1 1.0 +input1.bam FBtr0079694 72 52 F 1 52-52 1 1.0 +input1.bam FBtr0079694 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079702 72 1 F 1 1-1 1 1.0 +input1.bam FBtr0079702 72 19 F 2 19-19 1 2.0 +input1.bam FBtr0079702 72 56 F 1 56-56 1 1.0 +input1.bam FBtr0079702 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079728 72 1 F 2 1-1 1 2.0 +input1.bam FBtr0079728 72 8 F 1 8-8 1 1.0 +input1.bam FBtr0079728 72 19 F 1 19-19 1 1.0 +input1.bam FBtr0079728 72 33 F 3 33-33 1 3.0 +input1.bam FBtr0079728 72 56 F 1 56-56 1 1.0 +input1.bam FBtr0079728 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079729 72 1 F 1 1-1 1 1.0 +input1.bam FBtr0079729 72 13 F 1 13-13 1 1.0 +input1.bam FBtr0079729 72 54 F 2 52-57 6 0.333333333333 +input1.bam FBtr0079729 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079752 72 1 F 2 1-1 1 2.0 +input1.bam FBtr0079752 72 9 F 2 7-12 6 0.333333333333 +input1.bam FBtr0079752 72 33 F 2 33-33 1 2.0 +input1.bam FBtr0079752 72 52 F 2 52-52 1 2.0 +input1.bam FBtr0079752 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0079820 74 1 F 0 1-1 1 0.0 +input1.bam FBtr0079820 74 50 F 13 45-56 12 1.08333333333 +input1.bam FBtr0079820 74 74 F 0 74-74 1 0.0 +input1.bam FBtr0080609 72 10 F 60 1-20 20 3.0 +input1.bam FBtr0080609 72 42 F 1 42-42 1 1.0 +input1.bam FBtr0080609 72 51 F 2 51-52 2 1.0 +input1.bam FBtr0080609 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0080644 72 6 F 6 1-12 12 0.5 +input1.bam FBtr0080644 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0080646 72 4 F 3 1-7 7 0.428571428571 +input1.bam FBtr0080646 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0080647 72 6 F 10 1-12 12 0.833333333333 +input1.bam FBtr0080647 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0080660 72 4 F 7 1-7 7 1.0 +input1.bam FBtr0080660 72 72 F 0 72-72 1 0.0 +input1.bam FBtr0080663 71 9 F 102 1-17 17 6.0 +input1.bam FBtr0080663 71 26 F 1 26-26 1 1.0 +input1.bam FBtr0080663 71 32 F 2 32-32 1 2.0 +input1.bam FBtr0080663 71 50 F 10 50-51 2 5.0 +input1.bam FBtr0080663 71 71 F 0 71-71 1 0.0 +input1.bam FBtr0080664 71 11 F 108 1-21 21 5.14285714286 +input1.bam FBtr0080664 71 33 F 9 28-38 11 0.818181818182 +input1.bam FBtr0080664 71 53 F 11 50-56 7 1.57142857143 +input1.bam FBtr0080664 71 71 F 0 71-71 1 0.0