view idxml_exporter.py @ 0:ba86fd127f5a draft

Uploaded
author galaxyp
date Wed, 19 Dec 2012 00:32:25 -0500
parents
children
line wrap: on
line source

from xml.sax import make_parser, ContentHandler
from optparse import OptionParser


def main():
    (options, _) = _parse_args()
    with open(options.output, "w") as out:
        parser = make_parser()
        handler = _get_handler(options, out)
        parser.setContentHandler(handler)
        parser.parse(open(options.input, "r"))


def _get_handler(option, out):
    return PeptideHandler(out)


class PeptideHandler(ContentHandler):
    record_values = {
        "IdentificationRun": ["search_engine"],
        "PeptideIdentification": ["score_type", "significance_threshold", "MZ", "RT"],
        "PeptideHit": ["score", "sequence", "charge"],
    }

    def __init__(self, output):
        self.output = output

    def __record_values(self, keys, attrs):
        for key in keys:
            setattr(self, key, attrs.get(key, None))

    def startElement(self, name, attrs):
        self._set_attributes(name, attrs)

    def endElement(self, name):
        if name == "PeptideHit":
            self._write_peptide()
        # reset values for element
        self._set_attributes(name, {})

    def _write_peptide(self):
        col_keys = ["score", "peptide", "score_type", "charge", "MZ", "RT"]
        row_values = self._get_values(col_keys)
        row = "\t".join(row_values)
        self._write_line(row)

    def _write_line(self, line):
        self.output.write(line)
        self.output.write("\n")

    def _get_values(self, keys):
        return [getattr(self, key, "") for key in keys]

    def _set_attributes(self, name, attrs):
        for element_name, element_attributes in self.record_values.iteritems():
            if name == element_name:
                self.__record_values(element_attributes, attrs)


def _parse_args():
    parser = OptionParser()
    parser.add_option("--input", dest="input")
    parser.add_option("--output", dest="output")
    parser.add_option("--type", dest="type", choices=["peptide"])
    return parser.parse_args()

if __name__ == "__main__":
    main()