annotate format_fasta_hairpins.py @ 12:6d3e98cba73a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 6718f7c6a0dbb36210f85a65b2e1ae0269855bb5
author artbio
date Sat, 12 May 2018 09:06:24 -0400
parents de227b7307cf
children b045c30fb768
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
1 import argparse
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
2 import gzip
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
3
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
4
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
5 def Parser():
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
6 the_parser = argparse.ArgumentParser()
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
7 the_parser.add_argument(
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
8 '--hairpins_path', action="store", type=str,
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
9 help="BASE url. ex: /pub/mirbase/22/")
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
10 the_parser.add_argument(
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
11 '--output', action="store", type=str,
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
12 help="parsed hairpin output in fasta format")
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
13 the_parser.add_argument(
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
14 '--basename', action="store", type=str,
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
15 help="genome basename of the parsed fasta")
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
16 args = the_parser.parse_args()
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
17 return args
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
18
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
19
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
20 def get_fasta_dic(gzipfile):
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
21 '''
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
22 gzipfile value example : 'mirbase/22/hairpin.fa.gz'
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
23 '''
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
24 item_dic = {}
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
25 with gzip.open(gzipfile, 'rb') as f:
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
26 current_item = ''
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
27 stringlist = []
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
28 for line in f:
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
29 line = line.decode('utf-8').strip('\n')
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
30 if (line[0] == ">"):
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
31 # dump the sequence of the previous item
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
32 if current_item and stringlist:
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
33 item_dic[current_item] = "".join(stringlist)
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
34 # take first word of item '''
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
35 current_item = line[1:].split()[0]
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
36 stringlist = []
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
37 else:
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
38 stringlist.append(line)
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
39 item_dic[current_item] = "".join(stringlist) # for the last item
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
40 return item_dic
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
41
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
42
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
43 def convert_and_print_hairpins(gzipfile, basename, fasta_output):
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
44 raw_fasta_dict = get_fasta_dic(gzipfile)
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
45 parsed_fasta_dict = {}
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
46 trs = str.maketrans("uU", "tT")
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
47 for head in raw_fasta_dict:
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
48 if basename in head:
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
49 parsed_fasta_dict[head] = raw_fasta_dict[head].translate(trs)
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
50 with open(fasta_output, "w") as output:
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
51 for head in sorted(parsed_fasta_dict):
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
52 output.write('>%s\n%s\n' % (head, parsed_fasta_dict[head]))
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
53
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
54
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
55 def main(hairpins_path, basename, outfile):
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
56 convert_and_print_hairpins(hairpins_path, basename, outfile)
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
57
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
58
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
59 if __name__ == "__main__":
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
60 args = Parser()
de227b7307cf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff changeset
61 main(args.hairpins_path, args.basename, args.output)