Mercurial > repos > artbio > mircounts
comparison mature_mir_gff_translation.py @ 13:b045c30fb768 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit af48e9f6df2717ffd3731a974be1ec36e4eff779"
author | artbio |
---|---|
date | Fri, 18 Oct 2019 19:18:50 -0400 |
parents | de227b7307cf |
children |
comparison
equal
deleted
inserted
replaced
12:6d3e98cba73a | 13:b045c30fb768 |
---|---|
8 '--gff_path', action="store", type=str, | 8 '--gff_path', action="store", type=str, |
9 help="path to miRBase GFF3 file") | 9 help="path to miRBase GFF3 file") |
10 the_parser.add_argument( | 10 the_parser.add_argument( |
11 '--output', action="store", type=str, | 11 '--output', action="store", type=str, |
12 help="output GFF3 file with converted mature mir coordinates") | 12 help="output GFF3 file with converted mature mir coordinates") |
13 the_parser.add_argument( | |
14 '--basename', action="store", type=str, | |
15 help="basename of the parsed gff file returned") | |
16 args = the_parser.parse_args() | 13 args = the_parser.parse_args() |
17 return args | 14 return args |
18 | 15 |
19 | 16 |
20 def convert_and_print_gff(gff_input_file, output): | 17 def convert_and_print_gff(gff_input_file, output): |
41 gff_dict[ID]["premir_name"] = gff_fields[8].split( | 38 gff_dict[ID]["premir_name"] = gff_fields[8].split( |
42 "Name=")[1].split(";")[0] | 39 "Name=")[1].split(";")[0] |
43 gff_dict[ID]["primary"] = line[:-1] | 40 gff_dict[ID]["primary"] = line[:-1] |
44 gff_dict[ID]["miRNAs"] = [] | 41 gff_dict[ID]["miRNAs"] = [] |
45 elif gff_fields[2] == "miRNA": | 42 elif gff_fields[2] == "miRNA": |
43 if "_" in ID: | |
44 continue | |
46 parent_ID = gff_fields[8].split("erives_from=")[1] | 45 parent_ID = gff_fields[8].split("erives_from=")[1] |
47 gff_dict[parent_ID]["miRNAs"].append(line[:-1]) | 46 gff_dict[parent_ID]["miRNAs"].append(line[:-1]) |
48 # Now reorganise features and recalculate coordinates of premirs and mirs | 47 # Now reorganise features and recalculate coordinates of premirs and mirs |
49 gff_list = [] | 48 gff_list = [] |
50 for ID in sorted(gff_dict, key=lambda x: (gff_dict[x]['premir_name'])): | 49 for ID in sorted(gff_dict, key=lambda x: (gff_dict[x]['premir_name'])): |
68 source, type, newstart, newend, score, strand, | 67 source, type, newstart, newend, score, strand, |
69 phase, attributes)) | 68 phase, attributes)) |
70 # ensure their is only 2 child miRNAs at best | 69 # ensure their is only 2 child miRNAs at best |
71 if len(gff_dict[ID]["miRNAs"]) > 2: | 70 if len(gff_dict[ID]["miRNAs"]) > 2: |
72 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][:2] | 71 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][:2] |
73 # sort child miRNAs 5p first 3p second | 72 # sort child miRNAs 5p first 3p second, |
74 if gff_dict[ID]["miRNAs"][0].find('5p') == -1: | 73 # if there are two miR mature at least ! |
74 if len(gff_dict[ID]["miRNAs"]) > 1 and \ | |
75 gff_dict[ID]["miRNAs"][0].find('5p') == -1: | |
75 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][::-1] | 76 gff_dict[ID]["miRNAs"] = gff_dict[ID]["miRNAs"][::-1] |
76 for mir in gff_dict[ID]["miRNAs"]: | 77 for mir in gff_dict[ID]["miRNAs"]: |
77 mir_fields = mir.split('\t') | 78 mir_fields = mir.split('\t') |
78 mir_seqid = mir_fields[8].split("Name=")[1].split(";")[0] | 79 mir_seqid = mir_fields[8].split("Name=")[1].split(";")[0] |
79 mir_source = mir_fields[1] | 80 mir_source = mir_fields[1] |