view fasta_tabular_converter.py @ 0:951cb6b3979b draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author drosofff
date Sun, 21 Jun 2015 14:28:49 -0400
parents
children 2f7278120be9
line wrap: on
line source

#!/usr/bin/python
#
import sys
from collections import defaultdict

def readfasta_writetabular(fasta, tabular):
  F = open(fasta, "r")
  for line in F:
    if line[0] == ">": continue
    else:
      seqdic[line[:-1]] += 1
  F.close()
  F = open(tabular, "w")
  for seq in sorted(seqdic, key=seqdic.get, reverse=True):
    print >> F, "%s\t%s" % (seq, seqdic[seq])
  F.close()
    
        
def readtabular_writefasta(tabular, fasta):
  F = open(tabular, "r")
  Fw = open(fasta, "w")
  counter = 0
  for line in F:
    fields = line.split()
    for i in range(int(fields[1])):
      counter += 1
      print >> Fw, ">%s\n%s" % (counter, fields[0])
  F.close()
  Fw.close()

def readtabular_writefastaweighted (tabular, fasta):
  F = open(tabular, "r")
  Fw = open(fasta, "w")
  counter = 0
  for line in F:
    counter += 1
    fields = line[:-1].split()
    print >> Fw, ">%s_%s\n%s" % (counter, fields[1],  fields[0])
  F.close()
  Fw.close()

def readfastaeighted_writefastaweighted(fastaweigthed_input, fastaweigthed_reparsed):
  F = open(fastaweigthed_input, "r")
  number_reads = 0
  for line in F:
    if line[0] == ">":
      weigth = int(line[1:-1].split("_")[-1])
      number_reads += weigth
    else:
      seqdic[line[:-1]] += weigth
  F.close()
  F = open(fastaweigthed_reparsed, "w")
  n=0
  for seq in sorted(seqdic, key=seqdic.get, reverse=True):
    n += 1
    print >> F, ">%s_%s\n%s" % (n, seqdic[seq], seq)
  F.close()
  print "%s reads collapsed" % number_reads

def readfastaeighted_writefasta(fastaweigthed, fasta):
  F = open(fastaweigthed, "r")
  Fw = open(fasta, "w")
  counter = 0
  for line in F:
    if line[0] == ">":
      weigth = int(line[1:-1].split("_")[-1])
    else:
      seq = line[:-1]
      for i in range (weigth):
        counter += 1
        print >> Fw, ">%s\n%s" % (counter, seq)
  F.close()
  Fw.close()


seqdic = defaultdict(int)
option = sys.argv[3]

if option == "fasta2tabular":
  readfasta_writetabular(sys.argv[1], sys.argv[2])
elif option == "tabular2fasta":
  readtabular_writefasta(sys.argv[1], sys.argv[2])
elif option == "tabular2fastaweight":
  readtabular_writefastaweighted (sys.argv[1], sys.argv[2])
elif option == "fastaweight2fastaweight":
  readfastaeighted_writefastaweighted(sys.argv[1], sys.argv[2])
elif option == "fastaweight2fasta":
  readfastaeighted_writefasta(sys.argv[1], sys.argv[2])