annotate cut_degen.py @ 0:33b174991d3e draft default tip

planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
author megan-shortridge
date Tue, 15 Sep 2015 14:29:28 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
1 from Bio import SeqUtils
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
2 from Bio import SeqIO
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
3 import sys
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
4
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
5
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
6 #for records and adapter, should be sys.argv[1 and 2]
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
7 fastafile = sys.argv[1] #This is the input fasta file
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
8 adapter = sys.argv[2] #This is the input adapter as a string
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
9 keepreads = sys.argv[3] #True or false, this will determine whether or not reads are kept. If true, it will keep reads that do not have the adapter in it. If false, it will get rid of those reads.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
10 removeadapters = sys.argv[4] #True or false, if this is True, the adapters will be removed. If true, removes the adapters from the sequences. If false, it keeps them.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
11 end_defn = sys.argv[5] #If 5, the primer is removed from the 5' end of the sequence. If 3, then it is removed from the 3' end of the sequence.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
12 adapter_name = sys.argv[6] #This is the name of the adapter that you can put into the output text file.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
13
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
14
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
15 # Here is the command for the test: python cut_degen.py 'test.fasta' 'GAACWAYWYCT' 'True' 'True' '5' 'test'
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
16
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
17 keepreads = str(keepreads)
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
18 removeadapters = str(removeadapters)
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
19 fastafile=str(fastafile)
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
20 end_defn = str(end_defn)
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
21
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
22 fh = open(fastafile, mode='r+')
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
23 len_adapter = len(adapter)
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
24 count_adapter_found = 0
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
25 count_adapter_not_found = 0
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
26 total_seq_count = 0
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
27
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
28 parsed = SeqIO.parse(fh, format="fasta")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
29
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
30 output_fh_name = "output.fasta"
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
31
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
32 if fastafile=="test3prime.fasta":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
33 output_fh_name="output2.fasta"
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
34
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
35 output_fh = open(output_fh_name, mode='w+')
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
36
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
37 output_text_name = "output.txt"
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
38 if fastafile=="test3prime.fasta":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
39 output_text_name="output2.txt"
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
40 output_text_fh = open(output_text_name, mode='w+')
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
41
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
42
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
43 for record in parsed:
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
44 try:
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
45 sequence = str(record.seq)
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
46 search = SeqUtils.nt_search(sequence, adapter) #This will search the
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
47 index = int(search[1]) #If it finds the adapter, is the starting index from which it was found.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
48 adapter_start = index
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
49 adapter_end = index+len_adapter
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
50 count_adapter_found +=1
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
51 total_seq_count+=1
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
52 if removeadapters == "True": #if the value is true, it removes the adapters from the sequences.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
53 if end_defn=="5":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
54 record = record[adapter_end:] #If a 5' adapter, you remove adapter from beginning
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
55 elif end_defn=="3":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
56 record = record[:adapter_start] #If it is a 3' adapter, you remove the adapter at the end
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
57 elif removeadapters == "False": #if the value is false, it does not remove the adapters from the sequences.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
58 record = record
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
59 SeqIO.write(record, output_fh, format="fasta") #No matter what, write the reads.
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
60 except IndexError:
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
61 count_adapter_not_found+=1
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
62 total_seq_count+=1
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
63 record = record
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
64 if keepreads=="True":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
65 SeqIO.write(record, output_fh, format="fasta")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
66 elif keepreads=="False":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
67 pass
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
68 else:
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
69 pass
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
70
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
71 output_fh.close()
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
72
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
73 percent_cut = 100*(float(count_adapter_found)/float(total_seq_count))
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
74
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
75
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
76 output_text_fh.write("The total number of sequences that were analyzed was %i.\n\n"%total_seq_count)
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
77 output_text_fh.write("Adapter was found and removed for %i sequences (%i%% of total).\n\n"%(count_adapter_found, percent_cut))
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
78
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
79 if keepreads =="True":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
80 output_text_fh.write("Sequences that did not contain the adapter were kept.\n\n")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
81 elif keepreads=="False":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
82 output_text_fh.write("Sequences that did not contain the adapter were removed from the dataset.\n\n")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
83 if removeadapters=="True":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
84 output_text_fh.write("The adapters were removed from the dataset.\n\n")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
85 elif removeadapters=="False":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
86 output_text_fh.write("The adapters were not removed from the dataset.\n\n")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
87 if end_defn=="5":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
88 output_text_fh.write("Adapters were removed from the 5\' end.\n\n")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
89 elif end_defn=="3":
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
90 output_text_fh.write("Adapters were removed from the 3\'end.\n\n")
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
91
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
92 output_text_fh.write("The name of the adapter that was removed was named %s, and had the sequence %s.\n\n"%(adapter_name,adapter))
33b174991d3e planemo upload for repository https://github.com/mshortr/degenerateprimerremoval commit df5ba746a916e08e0847a727237f6bec5ef0511f-dirty
megan-shortridge
parents:
diff changeset
93 output_text_fh.close()