Mercurial > repos > artbio > small_rna_maps
diff small_rna_maps.py @ 20:de7fbcb1348c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit 44599c93586bc909f405ac6b745230563b290ee2
author | artbio |
---|---|
date | Sun, 25 Nov 2018 06:56:40 -0500 |
parents | f33afecac67a |
children | e75a10eba0a6 |
line wrap: on
line diff
--- a/small_rna_maps.py Thu Nov 22 20:05:32 2018 -0500 +++ b/small_rna_maps.py Sun Nov 25 06:56:40 2018 -0500 @@ -22,11 +22,21 @@ the_parser.add_argument('--bed', dest='bed', required=False, help='Name of bed output must be specified\ if --cluster option used') - the_parser.add_argument('--bed_skipcluster', dest='bed_skipcluster', - required=False, type=int, default=0, + the_parser.add_argument('--bed_skipsize', dest='bed_skipsize', + required=False, type=int, default=1, help='Skip clusters of size equal or less than\ specified integer in the bed output. \ - Default = 1') + Default = 0, not skipping') + the_parser.add_argument('--bed_skipdensity', dest='bed_skipdensity', + required=False, type=float, default=0, + help='Skip clusters of density equal or less than\ + specified float number in the bed output. \ + Default = 0, not skipping') + the_parser.add_argument('--bed_skipcounts', dest='bed_skipcounts', + required=False, type=int, default=1, + help='Skip clusters of size equal or less than\ + specified integer in the bed output. \ + Default = 0, not skipping') the_parser.add_argument('--outputs', nargs='+', action='store', help='list of two output paths (only two)') the_parser.add_argument('-M', '--plot_methods', nargs='+', action='store', @@ -107,7 +117,7 @@ [read_length, ...]} and returns a map_dictionary with structure: {(chromosome,read_position,polarity): - ([read_length, ...], [start_clust, end_clust])} + [*counts*, [start_clust, end_clust]]} ''' clustered_dic = defaultdict(list) for chrom in self.chromosomes: @@ -277,7 +287,7 @@ line = [str(i) for i in line] out.write('\t'.join(line) + '\n') - def write_cluster_table(self, clustered_dic, out, bedpath, skip): + def write_cluster_table(self, clustered_dic, out, bedpath): ''' Writer of a tabular file Dataset, Chromosome, Chrom_length, Coordinate, Polarity, @@ -285,32 +295,45 @@ out is an *open* file handler bed is an a file handler internal to the function ''' + def filterCluster(size, count, density): + if size < args.bed_skipsize: + return False + if count < args.bed_skipcounts: + return False + if density <= args.bed_skipdensity: + return False + return True bed = open(bedpath, 'w') + clusterid = 0 for key in sorted(clustered_dic): start = clustered_dic[key][1][0] end = clustered_dic[key][1][1] size = end - start + 1 + read_count = clustered_dic[key][0] if self.nostrand: polarity = '.' elif key[2] == 'F': polarity = '+' else: polarity = '-' - density = float(clustered_dic[key][0]) / size + density = float(read_count) / size line = [self.sample_name, key[0], self.chromosomes[key[0]], - key[1], key[2], clustered_dic[key][0], + key[1], key[2], read_count, str(start) + "-" + str(end), str(size), str(density)] line = [str(i) for i in line] - if size > skip: - bedline = [key[0], str(start-1), str(end), 'cluster', - str(clustered_dic[key][0]), polarity] + out.write('\t'.join(line) + '\n') + if filterCluster(size, read_count, density): + clusterid += 1 + name = 'cluster_' + str(clusterid) + bedline = [key[0], str(start-1), str(end), name, + str(read_count), polarity, str(density)] bed.write('\t'.join(bedline) + '\n') - out.write('\t'.join(line) + '\n') + print("number of reported clusters:", clusterid) bed.close() def main(inputs, samples, methods, outputs, minsize, maxsize, cluster, - nostrand, bedfile=None, bed_skipcluster=0): + nostrand, bedfile=None, bed_skipsize=0): for method, output in zip(methods, outputs): out = open(output, 'w') if method == 'Size': @@ -333,8 +356,7 @@ "Size": mapobj.compute_size, "cluster": mapobj.write_cluster_table} if cluster: - token["cluster"](mapobj.map_dict, out, bedfile, - bed_skipcluster) + token["cluster"](mapobj.map_dict, out, bedfile) else: token[method](mapobj.map_dict, out) out.close() @@ -347,5 +369,4 @@ args.sample_names = [name + '_' + str(i) for i, name in enumerate(args.sample_names)] main(args.inputs, args.sample_names, args.plot_methods, args.outputs, - args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed, - args.bed_skipcluster) + args.minsize, args.maxsize, args.cluster, args.nostrand, args.bed)