#! /usr/bin/env python
"""Read a file and split it into several, depending on a tag"""

import os
from optparse import OptionParser
from structure.transcriptContainer import *
from writer.gff3Writer import *
from misc.progress import *
from misc import utils


if __name__ == "__main__":
  
  # parse command line
  description = "Split By Tag: Read a file and split it into several, depending on a tag. [Category: Personnal]"

  parser = OptionParser(description = description)
  parser.add_option("-i", "--input",           dest="inputFileName",     action="store",                    type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
  parser.add_option("-f", "--format",          dest="format",            action="store",                    type="string", help="format of file 1 [compulsory] [format: transcript file format]")
  parser.add_option("-t", "--tag",             dest="tag",               action="store",                    type="string", help="tag on which the split is made [compulsory] [format: string]")
  parser.add_option("-o", "--output",          dest="outputFileName",    action="store",                    type="string", help="output file [format: output file in CSV format]")
  parser.add_option("-v", "--verbosity",       dest="verbosity",         action="store",      default=1,    type="int",    help="trace level [format: int]")
  (options, args) = parser.parse_args()

  transcriptContainer = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
  writers             = dict()

  progress = Progress(transcriptContainer.getNbTranscripts(), "Reading file %s" % (options.inputFileName), options.verbosity)
  for transcript in transcriptContainer.getIterator():
    value = transcript.getTagValue(options.tag)
    if value == None:
      value = "noTag"
    value = str(value).replace(" ", "_").title()
    if value not in writers:
      writers[value] = Gff3Writer("%s.gff3" % (os.path.join(options.outputFileName, value)))
    writers[value].addTranscript(transcript)

    progress.inc()
  progress.done()
