# HG changeset patch # User artbio # Date 1504607596 14400 # Node ID 9ea96a02c416d5e32d46bac22b345fce660325c7 # Parent da1aa7de2b195981802e356ff6ab3fbcf92589d3 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit 04980585c257ab5f8eb5d10de007316c47c5d1ce diff -r da1aa7de2b19 -r 9ea96a02c416 mature_mir_gff_translation.py --- a/mature_mir_gff_translation.py Mon Sep 04 17:55:01 2017 -0400 +++ b/mature_mir_gff_translation.py Tue Sep 05 06:33:16 2017 -0400 @@ -1,6 +1,6 @@ -#!/usr/bin/env python +import argparse -import argparse +from datetime import datetime def Parser(): @@ -14,19 +14,14 @@ return args -GFF3_header = '''##gff-version 3 -##generated by mature_mir_gff_translation.py -# -# Chromosomal coordinates of microRNAs ** relative to the hairpin precursors ** -# microRNAs: miRBase current_version -# genome-build-id: check http://mirbase.org/ -# -# Hairpin precursor sequences have type "miRNA_primary_transcript". -# Note, these sequences do not represent the full primary transcript, -# rather a predicted stem-loop portion that includes the precursor -# miRNA. Mature sequences have type "miRNA". -# -''' +def get_gff_header(gff_input_file): + string_list = [] + for line in open(gff_input_file, "r"): + if line[0] == '#': + string_list.append(line) + string_list.append('# generated by mature_mir_gff_translation.py %s\n#\n' % + str(datetime.now())) + return ''.join(string_list) def load_gff_in_dict(gff_input_file): @@ -51,9 +46,9 @@ gff_dict[ID]["strand"] = gff_fields[6] gff_dict[ID]["phase"] = gff_fields[7] gff_dict[ID]["attributes"] = gff_fields[8] - if "Derives_from" in gff_dict[ID]["attributes"]: + if "erives_from" in gff_dict[ID]["attributes"]: parent_primary_transcript = gff_dict[ID]["attributes"].split( - "Derives_from=")[1] + "erives_from=")[1] parent_primary_transcript = gff_dict[parent_primary_transcript][ "attributes"].split("Name=")[1] gff_dict[ID]["attributes"] = "%s;Parent_mir_Name=%s" % ( @@ -61,7 +56,7 @@ return gff_dict -def genome_to_mir_gff(gff_dict, output): +def genome_to_mir_gff(gff_dict, output, header): ''' Converts seqid field from chromosome to item Name Then converts coordinates relative to "miRNA_primary_transcript" @@ -70,9 +65,9 @@ for key in gff_dict: name = gff_dict[key]["attributes"].split("Name=")[1].split(";")[0] gff_dict[key]["seqid"] = name - if "Derives_from=" in gff_dict[key]["attributes"]: + if "erives_from=" in gff_dict[key]["attributes"]: parent_ID = gff_dict[key]["attributes"].split( - "Derives_from=")[1].split(";")[0] + "erives_from=")[1].split(";")[0] gff_dict[key]["start"] = str(int(gff_dict[key]["start"])-int( gff_dict[parent_ID]["start"])+1) gff_dict[key]["end"] = str(int(gff_dict[key]["end"])-int( @@ -95,7 +90,7 @@ "Name=")[1].split( ";")[0] with open(output, "w") as output: - output.write(GFF3_header) + output.write(header) for ID in sorted(hairpins, key=hairpins.get): output.write("\t".join([gff_dict[ID]["seqid"], gff_dict[ID]["source"], gff_dict[ID]["type"], @@ -117,7 +112,7 @@ def main(infile, outfile): gff_dict = load_gff_in_dict(infile) - genome_to_mir_gff(gff_dict, outfile) + genome_to_mir_gff(gff_dict, outfile, get_gff_header(infile)) if __name__ == "__main__": diff -r da1aa7de2b19 -r 9ea96a02c416 mircounts.xml --- a/mircounts.xml Mon Sep 04 17:55:01 2017 -0400 +++ b/mircounts.xml Tue Sep 05 06:33:16 2017 -0400 @@ -1,4 +1,4 @@ - + Counts miRNA alignments from small RNA sequence data gnu-wget @@ -9,9 +9,9 @@ r-lattice - @@ -166,10 +129,11 @@ + - + @@ -181,11 +145,12 @@ + - + @@ -195,11 +160,12 @@ + - + diff -r da1aa7de2b19 -r 9ea96a02c416 test-data/translated_dme.gff3 --- a/test-data/translated_dme.gff3 Mon Sep 04 17:55:01 2017 -0400 +++ b/test-data/translated_dme.gff3 Tue Sep 05 06:33:16 2017 -0400 @@ -1,15 +1,17 @@ ##gff-version 3 -##generated by mature_mir_gff_translation.py +##date 2014-6-22 # -# Chromosomal coordinates of microRNAs ** relative to the hairpin precursors ** -# microRNAs: miRBase current_version -# genome-build-id: check http://mirbase.org/ +# Chromosomal coordinates of Drosophila melanogaster microRNAs +# microRNAs: miRBase v21 +# genome-build-id: BDGP5.0 # # Hairpin precursor sequences have type "miRNA_primary_transcript". # Note, these sequences do not represent the full primary transcript, # rather a predicted stem-loop portion that includes the precursor # miRNA. Mature sequences have type "miRNA". # +# generated by mature_mir_gff_translation.py 2017-09-05 11:53:29.042787 +# dme-bantam . miRNA_primary_transcript 1 81 . + . ID=MI0000387;Alias=MI0000387;Name=dme-bantam dme-bantam-5p . miRNA 15 37 . + . ID=MIMAT0020823;Alias=MIMAT0020823;Name=dme-bantam-5p;Derives_from=MI0000387;Parent_mir_Name=dme-bantam dme-bantam-3p . miRNA 52 74 . + . ID=MIMAT0000365;Alias=MIMAT0000365;Name=dme-bantam-3p;Derives_from=MI0000387;Parent_mir_Name=dme-bantam