comparison filter_fasta.py @ 6:5990c4dbfbaa draft

Uploaded
author bornea
date Sat, 06 Aug 2016 17:04:57 -0400
parents af454e5a9ef5
children e01b49c112b0
comparison
equal deleted inserted replaced
5:af454e5a9ef5 6:5990c4dbfbaa
20 output = [] 20 output = []
21 for line in x: 21 for line in x:
22 line = line.strip() 22 line = line.strip()
23 temp = line.split('\t') 23 temp = line.split('\t')
24 output.append(temp) 24 output.append(temp)
25 print temp
26 return output 25 return output
27 def getAccessions(infile): # get list of protein accessions from your data 26 def getAccessions(infile): # get list of protein accessions from your data
28 data = readtab(infile) 27 data = readtab(infile)
29 cnt = 0 28 cnt = 0
30 header_start = 0 29 header_start = 0
78 if temp != []: 77 if temp != []:
79 merged = "\n".join(temp) 78 merged = "\n".join(temp)
80 if merged!="": 79 if merged!="":
81 seq.append(merged) 80 seq.append(merged)
82 cnt=0 81 cnt=0
83 with open("output.fasta","wt") as x: 82 with open("output.fasta","w") as x:
84 for i in header: 83 for i in header:
85 x.write(i+'\n'+seq[cnt]+'\n') 84 x.write(i+'\n'+seq[cnt]+'\n')
86 print i+'\n'+seq[cnt]+'\n'
87 cnt+=1 85 cnt+=1
88 fasta = sys.argv[1] # fasta file to filter 86 fasta = sys.argv[1] # fasta file to filter
89 data = sys.argv[2] # scaffold report #2 -- filename 87 data = sys.argv[2] # scaffold report #2 -- filename
90 88
91 FilterFastaSeq(fasta,getAccessions(data)) 89 FilterFastaSeq(fasta,getAccessions(data))