filter_fasta: filter_fasta.py comparison

comparison filter_fasta.py @ 10:c9e6e2e7697c draft

Uploaded

author	bornea
date	Tue, 16 Aug 2016 20:36:24 -0400
parents	6794a638a504
children	573c36ff075f

comparison

equal deleted inserted replaced

-:6794a638a504
+:c9e6e2e7697c
 data = readtab(infile)
 cnt = 0
 header_start = 0
 prot_start = 0
 for i in data:
-if "Accession" in i: # finds the start of header
+if "Accession Number" in i: # finds the start of header
 header_start = cnt
 break
 cnt += 1
 header = data[header_start]
 if "Accession Number" in header:
 prot_start = header.index("Accession Number")
 elif "Accession" in header:
 prot_start = header.index("Accession")
 proteins = []
-for protein in data[1:]:
+for protein in data[header_start:]:
-proteins.append(protein[prot_start])
+if len(protein) > prot_start:
+proteins.append(protein[prot_start])
 return proteins
 def FilterFastaSeq(infile,accession): # fasta file and UniprotID/SwissprotID
 input_data = readtab(infile)
 seq=[]
 header=[]
 for i in input_data:
 cnt+=1
 if flag == 1: # once we have a hit, start adding the sequences
 if ">" not in i[0]: # don't add the headers to the sequence
 temp.append(i[0])
-#print temp
 if i[0].startswith(">"): # is it a fasta header?
 if temp != []: # if it is a continued fasta header, add old sequences to the sequence list
 # will this cutoff the last on of the file?
 merged = "\n".join(temp)
 if merged!="":
 x = open("output.txt","w")
 for i in header:
 x.write(i+'\n'+seq[cnt]+'\n')
 cnt+=1
 x.close()
-fasta = sys.argv[1] # fasta file to filter
+#fasta = sys.argv[1] # fasta file to filter
-data = sys.argv[2] # scaffold report #2 -- filename
+#data = sys.argv[2] # scaffold report #2 -- filename
+fasta = r"C:\Users\Owner\Desktop\APOSTL\SAINT_preprocessing\SwissProt_HUMAN_2014_08.fasta"
+data = r"C:\Users\Owner\Desktop\APOSTL\Scaffold\scaffold_EGFR.txt"
 FilterFastaSeq(fasta,getAccessions(data))
-os.rename("output.txt", sys.argv[3])
+os.rename("output.txt", "output1.txt")

Mercurial > repos > bornea > filter_fasta

comparison filter_fasta.py @ 10:c9e6e2e7697c draft