Mercurial > repos > cpt > cpt_find_spanins
annotate spaninFuncs.py @ 3:fd70980a516b draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
| author | cpt | 
|---|---|
| date | Mon, 05 Jun 2023 02:42:01 +0000 | 
| parents | |
| children | 46b252c89e9e | 
| rev | line source | 
|---|---|
| 3 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 1 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 2 PREMISE | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 3 ### Functions/Classes that are used in both generate-putative-osp.py and generate-putative-isp.py | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 4 ###### Main premise here is to make the above scripts a little more DRY, as well as easily readable for execution. | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 5 ###### Documentation will ATTEMPT to be thourough here | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 6 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 7 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 8 import re | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 9 from Bio import SeqIO | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 10 from Bio import Seq | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 11 from collections import OrderedDict | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 12 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 13 # Not written in OOP for a LITTLE bit of trying to keep the complication down in case adjustments are needed by someone else. | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 14 # Much of the manipulation is string based; so it should be straightforward as well as moderately quick | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 15 ################## GLobal Variables | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 16 Lys = "K" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 17 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 18 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 19 def check_back_end_snorkels(seq, tmsize): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 20 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 21 Searches through the backend of a potential TMD snorkel. This is the 2nd part of a TMD snorkel lysine match. | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 22 --> seq : should be the sequence fed from the "search_region" portion of the sequence | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 23 --> tmsize : size of the potential TMD being investigated | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 24 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 25 found = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 26 if seq[tmsize - 4] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 5]): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 27 found = "match" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 28 return found | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 29 elif seq[tmsize - 3] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 4]): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 30 found = "match" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 31 return found | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 32 elif seq[tmsize - 2] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 3]): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 33 found = "match" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 34 return found | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 35 elif seq[tmsize - 1] == Lys and re.search(("[FIWLVMYCATGS]"), seq[tmsize - 2]): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 36 found = "match" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 37 return found | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 38 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 39 found = "NOTmatch" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 40 return found | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 41 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 42 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 43 def prep_a_gff3(fa, spanin_type, org): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 44 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 45 Function parses an input detailed 'fa' file and outputs a 'gff3' file | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 46 ---> fa = input .fa file | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 47 ---> output = output a returned list of data, easily portable to a gff3 next | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 48 ---> spanin_type = 'isp' or 'osp' | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 49 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 50 with org as f: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 51 header = f.readline() | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 52 orgacc = header.split(" ") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 53 orgacc = orgacc[0].split(">")[1].strip() | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 54 fa_zip = tuple_fasta(fa) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 55 data = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 56 for a_pair in fa_zip: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 57 # print(a_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 58 if re.search(("(\[1\])"), a_pair[0]): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 59 strand = "+" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 60 elif re.search(("(\[-1\])"), a_pair[0]): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 61 strand = "-" # column 7 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 62 start = re.search(("[\d]+\.\."), a_pair[0]).group(0).split("..")[0] # column 4 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 63 end = re.search(("\.\.[\d]+"), a_pair[0]).group(0).split("..")[1] # column 5 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 64 orfid = re.search(("(ORF)[\d]+"), a_pair[0]).group(0) # column 1 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 65 if spanin_type == "isp": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 66 methodtype = "CDS" # column 3 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 67 spanin = "isp" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 68 elif spanin_type == "osp": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 69 methodtype = "CDS" # column 3 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 70 spanin = "osp" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 71 elif spanin_type == "usp": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 72 methodtype = "CDS" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 73 spanin = "usp" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 74 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 75 raise "need to input spanin type" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 76 source = "cpt.py|putative-*.py" # column 2 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 77 score = "." # column 6 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 78 phase = "." # column 8 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 79 attributes = ( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 80 "ID=" + orgacc + "|" + orfid + ";ALIAS=" + spanin + ";SEQ=" + a_pair[1] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 81 ) # column 9 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 82 sequence = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 83 [orgacc, source, methodtype, start, end, score, strand, phase, attributes] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 84 ] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 85 data += sequence | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 86 return data | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 87 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 88 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 89 def write_gff3(data, output="results.gff3"): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 90 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 91 Parses results from prep_a_gff3 into a gff3 file | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 92 ---> input : list from prep_a_gff3 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 93 ---> output : gff3 file | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 94 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 95 data = data | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 96 filename = output | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 97 with filename as f: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 98 f.write("#gff-version 3\n") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 99 for value in data: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 100 f.write( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 101 "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 102 value[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 103 value[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 104 value[2], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 105 value[3], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 106 value[4], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 107 value[5], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 108 value[6], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 109 value[7], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 110 value[8], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 111 ) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 112 ) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 113 f.close() | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 114 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 115 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 116 def find_tmd( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 117 pair, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 118 minimum=10, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 119 maximum=30, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 120 TMDmin=10, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 121 TMDmax=20, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 122 isp_mode=False, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 123 peri_min=18, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 124 peri_max=206, | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 125 ): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 126 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 127 Function that searches for lysine snorkels and then for a spanning hydrophobic region that indicates a potential TMD | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 128 ---> pair : Input of tuple with description and AA sequence (str) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 129 ---> minimum : How close from the initial start codon a TMD can be within | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 130 ---> maximum : How far from the initial start codon a TMD can be within | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 131 ---> TMDmin : The minimum size that a transmembrane can be (default = 10) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 132 ---> TMDmax : The maximum size tha ta transmembrane can be (default = 20) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 133 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 134 # hydrophobicAAs = ['P', 'F', 'I', 'W', 'L', 'V', 'M', 'Y', 'C', 'A', 'T', 'G', 'S'] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 135 tmd = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 136 s = str(pair[1]) # sequence being analyzed | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 137 # print(s) # for trouble shooting | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 138 if maximum > len(s): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 139 maximum = len(s) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 140 search_region = s[minimum - 1 : maximum + 1] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 141 # print(f"this is the search region: {search_region}") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 142 # print(search_region) # for trouble shooting | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 143 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 144 for tmsize in range(TMDmin, TMDmax + 1, 1): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 145 # print(f"this is the current tmsize we're trying: {tmsize}") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 146 # print('==============='+str(tmsize)+'================') # print for troubleshooting | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 147 pattern = ( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 148 "[PFIWLVMYCATGS]{" + str(tmsize) + "}" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 149 ) # searches for these hydrophobic residues tmsize total times | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 150 # print(pattern) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 151 # print(f"sending to regex: {search_region}") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 152 if re.search( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 153 ("[K]"), search_region[1:8] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 154 ): # grabbing one below with search region, so I want to grab one ahead here when I query. | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 155 store_search = re.search( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 156 ("[K]"), search_region[1:8] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 157 ) # storing regex object | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 158 where_we_are = store_search.start() # finding where we got the hit | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 159 if re.search( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 160 ("[PFIWLVMYCATGS]"), search_region[where_we_are + 1] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 161 ) and re.search( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 162 ("[PFIWLVMYCATGS]"), search_region[where_we_are - 1] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 163 ): # hydrophobic neighbor | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 164 # try: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 165 g = re.search( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 166 ("[PFIWLVMYCATGS]"), search_region[where_we_are + 1] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 167 ).group() | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 168 backend = check_back_end_snorkels(search_region, tmsize) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 169 if backend == "match": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 170 if isp_mode: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 171 g = re.search((pattern), search_region).group() | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 172 end_of_tmd = re.search((g), s).end() + 1 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 173 amt_peri = len(s) - end_of_tmd | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 174 if peri_min <= amt_peri <= peri_max: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 175 pair_desc = pair[0] + ", peri_count~=" + str(amt_peri) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 176 new_pair = (pair_desc, pair[1]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 177 tmd.append(new_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 178 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 179 tmd.append(pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 180 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 181 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 182 # else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 183 # print("I'm continuing out of snorkel loop") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 184 # print(f"{search_region}") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 185 # continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 186 if re.search((pattern), search_region): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 187 # print(f"found match: {}") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 188 # print("I AM HEREEEEEEEEEEEEEEEEEEEEEEE") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 189 # try: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 190 if isp_mode: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 191 g = re.search((pattern), search_region).group() | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 192 end_of_tmd = re.search((g), s).end() + 1 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 193 amt_peri = len(s) - end_of_tmd | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 194 if peri_min <= amt_peri <= peri_max: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 195 pair_desc = pair[0] + ", peri_count~=" + str(amt_peri) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 196 new_pair = (pair_desc, pair[1]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 197 tmd.append(new_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 198 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 199 tmd.append(pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 200 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 201 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 202 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 203 return tmd | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 204 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 205 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 206 def find_lipobox( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 207 pair, minimum=10, maximum=50, min_after=30, max_after=185, regex=1, osp_mode=False | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 208 ): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 209 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 210 Function that takes an input tuple, and will return pairs of sequences to their description that have a lipoobox | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 211 ---> minimum - min distance from start codon to first AA of lipobox | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 212 ---> maximum - max distance from start codon to first AA of lipobox | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 213 ---> regex - option 1 (default) => more strict regular expression ; option 2 => looser selection, imported from LipoRy | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 214 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 215 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 216 if regex == 1: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 217 pattern = "[ILMFTV][^REKD][GAS]C" # regex for Lipobox from findSpanin.pl | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 218 elif regex == 2: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 219 pattern = "[ACGSILMFTV][^REKD][GAS]C" # regex for Lipobox from LipoRy | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 220 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 221 candidates = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 222 s = str(pair[1]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 223 # print(s) # trouble shooting | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 224 search_region = s[ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 225 minimum - 1 : maximum + 5 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 226 ] # properly slice the input... add 4 to catch if it hangs off at max input | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 227 # print(search_region) # trouble shooting | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 228 patterns = ["[ILMFTV][^REKD][GAS]C", "AW[AGS]C"] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 229 for pattern in patterns: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 230 # print(pattern) # trouble shooting | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 231 if re.search((pattern), search_region): # lipobox must be WITHIN the range... | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 232 # searches the sequence with the input RegEx AND omits if | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 233 g = re.search( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 234 (pattern), search_region | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 235 ).group() # find the exact group match | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 236 amt_peri = len(s) - re.search((g), s).end() + 1 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 237 if min_after <= amt_peri <= max_after: # find the lipobox end region | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 238 if osp_mode: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 239 pair_desc = pair[0] + ", peri_count~=" + str(amt_peri) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 240 new_pair = (pair_desc, pair[1]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 241 candidates.append(new_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 242 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 243 candidates.append(pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 244 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 245 return candidates | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 246 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 247 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 248 def tuple_fasta(fasta_file): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 249 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 250 #### INPUT: Fasta File | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 251 #### OUTPUT: zipped (zip) : pairwise relationship of description to sequence | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 252 #### | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 253 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 254 fasta = SeqIO.parse(fasta_file, "fasta") | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 255 descriptions = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 256 sequences = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 257 for r in fasta: # iterates and stores each description and sequence | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 258 description = r.description | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 259 sequence = str(r.seq) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 260 if ( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 261 sequence[0] != "I" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 262 ): # the translation table currently has I as a potential start codon ==> this will remove all ORFs that start with I | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 263 descriptions.append(description) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 264 sequences.append(sequence) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 265 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 266 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 267 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 268 return zip(descriptions, sequences) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 269 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 270 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 271 def lineWrapper(text, charactersize=60): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 272 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 273 if len(text) <= charactersize: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 274 return text | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 275 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 276 return ( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 277 text[:charactersize] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 278 + "\n" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 279 + lineWrapper(text[charactersize:], charactersize) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 280 ) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 281 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 282 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 283 def getDescriptions(fasta): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 284 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 285 Takes an output FASTA file, and parses retrieves the description headers. These headers contain information needed | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 286 for finding locations of a potential i-spanin and o-spanin proximity to one another. | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 287 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 288 desc = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 289 with fasta as f: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 290 for line in f: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 291 if line.startswith(">"): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 292 desc.append(line) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 293 return desc | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 294 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 295 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 296 def splitStrands(text, strand="+"): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 297 # positive_strands = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 298 # negative_strands = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 299 if strand == "+": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 300 if re.search(("(\[1\])"), text): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 301 return text | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 302 elif strand == "-": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 303 if re.search(("(\[-1\])"), text): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 304 return text | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 305 # return positive_strands, negative_strands | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 306 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 307 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 308 def parse_a_range(pair, start, end): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 309 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 310 Takes an input data tuple from a fasta tuple pair and keeps only those within the input sequence range | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 311 ---> data : fasta tuple data | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 312 ---> start : start range to keep | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 313 ---> end : end range to keep (will need to + 1) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 314 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 315 matches = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 316 for each_pair in pair: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 317 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 318 s = re.search(("[\d]+\.\."), each_pair[0]).group(0) # Start of the sequence | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 319 s = int(s.split("..")[0]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 320 e = re.search(("\.\.[\d]+"), each_pair[0]).group(0) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 321 e = int(e.split("..")[1]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 322 if start - 1 <= s and e <= end + 1: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 323 matches.append(each_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 324 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 325 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 326 # else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 327 # continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 328 # if matches != []: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 329 return matches | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 330 # else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 331 # print('no candidates within selected range') | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 332 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 333 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 334 def grabLocs(text): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 335 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 336 Grabs the locations of the spanin based on NT location (seen from ORF). Grabs the ORF name, as per named from the ORF class/module | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 337 from cpt.py | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 338 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 339 start = re.search(("[\d]+\.\."), text).group( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 340 0 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 341 ) # Start of the sequence ; looks for [numbers].. | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 342 end = re.search(("\.\.[\d]+"), text).group( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 343 0 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 344 ) # End of the sequence ; Looks for ..[numbers] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 345 orf = re.search(("(ORF)[\d]+"), text).group( | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 346 0 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 347 ) # Looks for ORF and the numbers that are after it | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 348 if re.search(("(\[1\])"), text): # stores strand | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 349 strand = "+" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 350 elif re.search(("(\[-1\])"), text): # stores strand | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 351 strand = "-" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 352 start = int(start.split("..")[0]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 353 end = int(end.split("..")[1]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 354 vals = [start, end, orf, strand] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 355 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 356 return vals | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 357 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 358 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 359 def spaninProximity(isp, osp, max_dist=30): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 360 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 361 _NOTE THIS FUNCTION COULD BE MODIFIED TO RETURN SEQUENCES_ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 362 Compares the locations of i-spanins and o-spanins. max_dist is the distance in NT measurement from i-spanin END site | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 363 to o-spanin START. The user will be inputting AA distance, so a conversion will be necessary (<user_input> * 3) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 364 I modified this on 07.30.2020 to bypass the pick + or - strand. To | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 365 INPUT: list of OSP and ISP candidates | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 366 OUTPUT: Return (improved) candidates for overlapping, embedded, and separate list | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 367 """ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 368 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 369 embedded = {} | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 370 overlap = {} | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 371 separate = {} | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 372 for iseq in isp: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 373 embedded[iseq[2]] = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 374 overlap[iseq[2]] = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 375 separate[iseq[2]] = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 376 for oseq in osp: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 377 if iseq[3] == "+": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 378 if oseq[3] == "+": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 379 if iseq[0] < oseq[0] < iseq[1] and oseq[1] < iseq[1]: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 380 ### EMBEDDED ### | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 381 combo = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 382 iseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 383 iseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 384 oseq[2], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 385 oseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 386 oseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 387 iseq[3], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 388 ] # ordering a return for dic | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 389 embedded[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 390 elif iseq[0] < oseq[0] <= iseq[1] and oseq[1] > iseq[1]: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 391 ### OVERLAP / SEPARATE ### | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 392 if (iseq[1] - oseq[0]) < 6: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 393 combo = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 394 iseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 395 iseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 396 oseq[2], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 397 oseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 398 oseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 399 iseq[3], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 400 ] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 401 separate[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 402 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 403 combo = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 404 iseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 405 iseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 406 oseq[2], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 407 oseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 408 oseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 409 iseq[3], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 410 ] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 411 overlap[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 412 elif iseq[1] <= oseq[0] <= iseq[1] + max_dist: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 413 combo = [iseq[0], iseq[1], oseq[2], oseq[0], oseq[1], iseq[3]] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 414 separate[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 415 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 416 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 417 if iseq[3] == "-": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 418 if oseq[3] == "-": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 419 if iseq[0] <= oseq[1] <= iseq[1] and oseq[0] > iseq[0]: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 420 ### EMBEDDED ### | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 421 combo = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 422 iseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 423 iseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 424 oseq[2], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 425 oseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 426 oseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 427 iseq[3], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 428 ] # ordering a return for dict | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 429 embedded[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 430 elif iseq[0] <= oseq[1] <= iseq[1] and oseq[0] < iseq[0]: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 431 if (oseq[1] - iseq[0]) < 6: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 432 combo = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 433 iseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 434 iseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 435 oseq[2], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 436 oseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 437 oseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 438 iseq[3], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 439 ] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 440 separate[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 441 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 442 combo = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 443 iseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 444 iseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 445 oseq[2], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 446 oseq[0], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 447 oseq[1], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 448 iseq[3], | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 449 ] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 450 overlap[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 451 elif iseq[0] - 10 < oseq[1] < iseq[0]: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 452 combo = [iseq[0], iseq[1], oseq[2], oseq[0], oseq[1], iseq[3]] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 453 separate[iseq[2]] += [combo] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 454 else: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 455 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 456 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 457 embedded = {k: embedded[k] for k in embedded if embedded[k]} | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 458 overlap = {k: overlap[k] for k in overlap if overlap[k]} | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 459 separate = {k: separate[k] for k in separate if separate[k]} | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 460 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 461 return embedded, overlap, separate | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 462 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 463 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 464 def check_for_usp(): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 465 "pass" | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 466 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 467 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 468 ############################################### TEST RANGE ######################################################################### | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 469 #################################################################################################################################### | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 470 if __name__ == "__main__": | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 471 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 472 #### TMD TEST | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 473 test_desc = ["one", "two", "three", "four", "five"] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 474 test_seq = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 475 "XXXXXXXXXXXXXXXFMCFMCFMCFMCFMCXXXXXXXXXXXXXXXXXXXXXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 476 "XXXXXXXXAAKKKKKKKKKKKKKKKXXXXXXXXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 477 "XXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 478 "XXXXXXXXXXXKXXXXXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 479 "XXXXXXXXXXAKXXXXXXXXXXAKXXXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 480 ] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 481 # for l in | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 482 # combo = zip(test_desc,test_seq) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 483 pairs = zip(test_desc, test_seq) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 484 tmd = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 485 for each_pair in pairs: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 486 # print(each_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 487 try: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 488 tmd += find_tmd(pair=each_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 489 except (IndexError, TypeError): | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 490 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 491 # try:s = each_pair[1] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 492 # tmd += find_tmd(seq=s, tmsize=15) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 493 # print('\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 494 # print(tmd) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 495 # print('\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 496 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 497 #### tuple-fasta TEST | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 498 # fasta_file = 'out_isp.fa' | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 499 # ret = tuple_fasta(fasta_file) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 500 # print('=============') | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 501 # for i in ret: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 502 # print(i[1]) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 503 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 504 #### LipoBox TEST | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 505 test_desc = ["one", "two", "three", "four", "five", "six", "seven"] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 506 test_seq = [ | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 507 "XXXXXXXXXTGGCXXXXXXXXXXXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 508 "XXXXXXXXAAKKKKKKKKKKKKKKKXXXXXXXXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 509 "XXXXXXX", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 510 "AGGCXXXXXXXXXXXXXXXXXXXXTT", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 511 "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXTGGC", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 512 "XXXXXXXXXXXXXXXXXXXXXXXXXXTGGC", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 513 "MSTLRELRLRRALKEQSMRYLLSIKKTLPRWKGALIGLFLICVATISGCASESKLPEPPMVSVDSSLMVEPNLTTEMLNVFSQ*", | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 514 ] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 515 pairs = zip(test_desc, test_seq) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 516 lipo = [] | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 517 for each_pair in pairs: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 518 # print(each_pair) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 519 # try: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 520 try: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 521 lipo += find_lipobox(pair=each_pair, regex=2) # , minimum=8) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 522 except TypeError: # catches if something doesnt have the min/max requirements (something is too small) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 523 continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 524 # except: | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 525 # continue | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 526 # print('\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 527 #############################3 | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 528 # g = prep_a_gff3(fa='putative_isp.fa', spanin_type='isp') | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 529 # print(g) | 
| 
fd70980a516b
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 530 # write_gff3(data=g) | 
