Mercurial > repos > artbio > mircounts
annotate format_fasta_hairpins.py @ 13:b045c30fb768 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
author | artbio |
---|---|
date | Fri, 18 Oct 2019 19:18:50 -0400 |
parents | de227b7307cf |
children |
rev | line source |
---|---|
10
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
1 import argparse |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
2 import gzip |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
3 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
4 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
5 def Parser(): |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
6 the_parser = argparse.ArgumentParser() |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
7 the_parser.add_argument( |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
8 '--hairpins_path', action="store", type=str, |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
9 help="BASE url. ex: /pub/mirbase/22/") |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
10 the_parser.add_argument( |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
11 '--output', action="store", type=str, |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
12 help="parsed hairpin output in fasta format") |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
13 the_parser.add_argument( |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
14 '--basename', action="store", type=str, |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
15 help="genome basename of the parsed fasta") |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
16 args = the_parser.parse_args() |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
17 return args |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
18 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
19 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
20 def get_fasta_dic(gzipfile): |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
21 ''' |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
22 gzipfile value example : 'mirbase/22/hairpin.fa.gz' |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
23 ''' |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
24 item_dic = {} |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
25 with gzip.open(gzipfile, 'rb') as f: |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
26 current_item = '' |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
27 stringlist = [] |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
28 for line in f: |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
29 line = line.decode('utf-8').strip('\n') |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
30 if (line[0] == ">"): |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
31 # dump the sequence of the previous item |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
32 if current_item and stringlist: |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
33 item_dic[current_item] = "".join(stringlist) |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
34 # take first word of item ''' |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
35 current_item = line[1:].split()[0] |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
36 stringlist = [] |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
37 else: |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
38 stringlist.append(line) |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
39 item_dic[current_item] = "".join(stringlist) # for the last item |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
40 return item_dic |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
41 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
42 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
43 def convert_and_print_hairpins(gzipfile, basename, fasta_output): |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
44 raw_fasta_dict = get_fasta_dic(gzipfile) |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
45 parsed_fasta_dict = {} |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
46 for head in raw_fasta_dict: |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
47 if basename in head: |
13
b045c30fb768
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
10
diff
changeset
|
48 parsed_fasta_dict[head] = raw_fasta_dict[head] |
b045c30fb768
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
10
diff
changeset
|
49 parsed_fasta_dict[head] = ''.join( |
b045c30fb768
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
10
diff
changeset
|
50 [i if i != 'u' else 't' for i in parsed_fasta_dict[head]]) |
b045c30fb768
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
10
diff
changeset
|
51 parsed_fasta_dict[head] = ''.join( |
b045c30fb768
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
artbio
parents:
10
diff
changeset
|
52 [i if i != 'U' else 'T' for i in parsed_fasta_dict[head]]) |
10
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
53 with open(fasta_output, "w") as output: |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
54 for head in sorted(parsed_fasta_dict): |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
55 output.write('>%s\n%s\n' % (head, parsed_fasta_dict[head])) |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
56 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
57 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
58 def main(hairpins_path, basename, outfile): |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
59 convert_and_print_hairpins(hairpins_path, basename, outfile) |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
60 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
61 |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
62 if __name__ == "__main__": |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
63 args = Parser() |
de227b7307cf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af0f70b8156c078cc0d832c54ebb678af10c42a0
artbio
parents:
diff
changeset
|
64 main(args.hairpins_path, args.basename, args.output) |