view mass.py @ 12:d080005cffe1 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/cufflinks/cufflinks commit a0b0845a9d1b3e7ecdeacd1e606133617e3918bd"
author iuc
date Tue, 16 Jun 2020 13:00:32 -0400
parents
children
line wrap: on
line source

import shutil
import sys
import tempfile


def parse_gff_attributes(attr_str):
    """
    Parses a GFF/GTF attribute string and returns a dictionary of name-value
    pairs. The general format for a GFF3 attributes string is

        name1=value1;name2=value2

    The general format for a GTF attribute string is

        name1 "value1" ; name2 "value2"

    The general format for a GFF attribute string is a single string that
    denotes the interval's group; in this case, method returns a dictionary
    with a single key-value pair, and key name is 'group'
    """
    attributes_list = attr_str.split(";")
    attributes = {}
    for name_value_pair in attributes_list:
        # Try splitting by '=' (GFF3) first because spaces are allowed in GFF3
        # attribute; next, try double quotes for GTF.
        pair = name_value_pair.strip().split("=")
        if len(pair) == 1:
            pair = name_value_pair.strip().split("\"")
        if len(pair) == 1:
            # Could not split for some reason -- raise exception?
            continue
        if pair == '':
            continue
        name = pair[0].strip()
        if name == '':
            continue
        # Need to strip double quote from values
        value = pair[1].strip(" \"")
        attributes[name] = value

    if len(attributes) == 0:
        # Could not split attributes string, so entire string must be
        # 'group' attribute. This is the case for strictly GFF files.
        attributes['group'] = attr_str
    return attributes


def gff_attributes_to_str(attrs, gff_format):
    """
    Convert GFF attributes to string. Supported formats are GFF3, GTF.
    """
    if gff_format == 'GTF':
        format_string = '%s "%s"'
        # Convert group (GFF) and ID, parent (GFF3) attributes to transcript_id, gene_id
        id_attr = None
        if 'group' in attrs:
            id_attr = 'group'
        elif 'ID' in attrs:
            id_attr = 'ID'
        elif 'Parent' in attrs:
            id_attr = 'Parent'
        if id_attr:
            attrs['transcript_id'] = attrs['gene_id'] = attrs[id_attr]
    elif gff_format == 'GFF3':
        format_string = '%s=%s'
    attrs_strs = []
    for name, value in attrs.items():
        attrs_strs.append(format_string % (name, value))
    return " ; ".join(attrs_strs)


stderr = sys.argv[1]
global_model_file_name = sys.argv[2]
transcripts = sys.argv[3]

# Read standard error to get total map/upper quartile mass.
total_map_mass = -1
with open(stderr, 'r') as tmp_stderr2:
    for line in tmp_stderr2:
        if line.lower().find("map mass") >= 0 or line.lower().find("upper quartile") >= 0:
            total_map_mass = float(line.split(":")[1].strip())
            break

if global_model_file_name != "None":
    # Global model is simply total map mass from original run.
    with open(global_model_file_name, 'r') as global_model_file:
        global_model_total_map_mass = float(global_model_file.readline())

    # Ratio of global model's total map mass to original run's map mass is
    # factor used to adjust FPKM.
    fpkm_map_mass_ratio = total_map_mass / global_model_total_map_mass

    # Update FPKM values in transcripts.gtf file.
    with open(transcripts, 'r') as transcripts_file:
        with tempfile.NamedTemporaryFile(dir=".", delete=False) as new_transcripts_file:
            for line in transcripts_file:
                fields = line.split('\t')
                attrs = parse_gff_attributes(fields[8])
                attrs["FPKM"] = str(float(attrs["FPKM"]) * fpkm_map_mass_ratio)
                attrs["conf_lo"] = str(float(attrs["conf_lo"]) * fpkm_map_mass_ratio)
                attrs["conf_hi"] = str(float(attrs["conf_hi"]) * fpkm_map_mass_ratio)
                fields[8] = gff_attributes_to_str(attrs, "GTF")
                new_transcripts_file.write("%s\n" % '\t'.join(fields))
    shutil.move(new_transcripts_file.name, transcripts)

if total_map_mass > -1:
    with open("global_model.txt", 'w') as f:
        f.write("%f\n" % total_map_mass)