Mercurial > repos > cpt > cpt_linear_genome_plot
annotate dna_features_viewer/biotools.py @ 6:e2449c8659be draft default tip
planemo upload commit fd578ee3fe8a0de6761ab6f59b2f0ca03117a75b
| author | cpt | 
|---|---|
| date | Sat, 13 Jul 2024 20:35:48 +0000 | 
| parents | b79e98299a78 | 
| children | 
| rev | line source | 
|---|---|
| 1 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 1 import textwrap | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 2 from Bio.Seq import Seq | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 3 from Bio.SeqFeature import SeqFeature, FeatureLocation | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 4 from Bio.PDB.Polypeptide import aa1, aa3 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 5 from Bio import SeqIO | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 6 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 7 try: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 8 from BCBio import GFF | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 9 except ImportError: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 10 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 11 class GFF: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 12 def parse(*a): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 13 """Not available. Please install bcbio-gff.""" | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 14 raise ImportError("Please install the bcbio-gff library to parse GFF data") | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 15 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 16 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 17 def complement(dna_sequence): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 18 """Return the complement of the DNA sequence. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 19 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 20 For instance ``complement("ATGCCG")`` returns ``"TACGGC"``. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 21 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 22 Uses BioPython for speed. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 23 """ | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 24 return str(Seq(dna_sequence).complement()) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 25 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 26 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 27 def reverse_complement(sequence): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 28 """Return the reverse-complement of the DNA sequence. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 29 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 30 For instance ``complement("ATGCCG")`` returns ``"GCCGTA"``. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 31 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 32 Uses BioPython for speed. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 33 """ | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 34 return complement(sequence)[::-1] | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 35 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 36 | 
| 3 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 37 if type(aa1) is str and type(aa3) is list: | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 38 aa_short_to_long_form_dict = { | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 39 _aa1: _aa3[0] + _aa3[1:].lower() for (_aa1, _aa3) in zip(aa1 + "*", aa3 + ["*"]) | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 40 } | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 41 else: | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 42 aa_short_to_long_form_dict = { | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 43 _aa1: _aa3[0] + _aa3[1:].lower() | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 44 for (_aa1, _aa3) in zip(aa1 + ("*",), aa3 + ("*",)) | 
| 
b79e98299a78
planemo upload commit b9287cffb7503159debac57d68917f5d337f0c9e-dirty
 cpt parents: 
1diff
changeset | 45 } | 
| 1 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 46 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 47 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 48 def translate(dna_sequence, long_form=False): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 49 """Translate the DNA sequence into an amino-acids sequence MLKYQT... | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 50 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 51 If long_form is true, a list of 3-letter amino acid representations | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 52 is returned instead (['Ala', 'Ser', ...]). | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 53 """ | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 54 result = str(Seq(dna_sequence).translate()) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 55 if long_form: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 56 result = [aa_short_to_long_form_dict[aa] for aa in result] | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 57 return result | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 58 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 59 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 60 def extract_graphical_translation(sequence, location, long_form=False): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 61 """Return a string of the "graphical" translation of a sequence's subsegment. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 62 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 63 Here "graphical" means that the amino acid sequence is always given | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 64 left-to-right, as it will appear under the sequence in the plot. This matters | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 65 when the location is on the -1 strand. In this case, the amino-acids are | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 66 determined by (part of) the reverse-complement of the sequence, however | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 67 the sequence returned will be the mirror of the translated sequence, as | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 68 this is the left-to-right order in which the codons corresponding to the | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 69 amino-acids appear in the sequence. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 70 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 71 Parameters | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 72 ---------- | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 73 sequence | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 74 An "ATGC" string. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 75 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 76 location | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 77 Either (start, end) or (start, end, strand), with strand in (0, 1, -1). | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 78 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 79 long_form | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 80 if True, a list of 3-letter amino acid representations is returned instead | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 81 (['Ala', 'Ser', ...]). | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 82 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 83 """ | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 84 if len(location) == 3: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 85 start, end, strand = location | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 86 else: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 87 start, end = location | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 88 strand = 1 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 89 subsequence = sequence[start:end] | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 90 if strand == -1: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 91 subsequence = reverse_complement(subsequence) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 92 translation = translate(subsequence, long_form=long_form) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 93 if strand == -1: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 94 translation = translation[::-1] | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 95 return translation | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 96 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 97 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 98 def load_record(path): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 99 """Load a Genbank file""" | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 100 if isinstance(path, str): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 101 # Input is a file path | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 102 if path.lower().endswith(".gff"): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 103 return list(GFF.parse(path))[0] | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 104 else: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 105 return SeqIO.read(path, "genbank") | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 106 else: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 107 # Input is a file-like object | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 108 try: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 109 return SeqIO.read(path, "genbank") | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 110 except: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 111 path.seek(0) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 112 return list(GFF.parse(path))[0] | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 113 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 114 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 115 def annotate_biopython_record( | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 116 seqrecord, location="full", feature_type="misc_feature", margin=0, **qualifiers | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 117 ): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 118 """Add a feature to a Biopython SeqRecord. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 119 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 120 Parameters | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 121 ---------- | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 122 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 123 seqrecord | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 124 The biopython seqrecord to be annotated. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 125 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 126 location | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 127 Either (start, end) or (start, end, strand). (strand defaults to +1) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 128 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 129 feature_type | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 130 The type associated with the feature | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 131 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 132 margin | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 133 Number of extra bases added on each side of the given location. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 134 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 135 qualifiers | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 136 Dictionnary that will be the Biopython feature's `qualifiers` attribute. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 137 """ | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 138 if location == "full": | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 139 location = (margin, len(seqrecord) - margin) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 140 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 141 strand = location[2] if len(location) == 3 else 1 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 142 seqrecord.features.append( | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 143 SeqFeature( | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 144 FeatureLocation(location[0], location[1], strand), | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 145 qualifiers=qualifiers, | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 146 type=feature_type, | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 147 ) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 148 ) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 149 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 150 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 151 def find_narrowest_text_wrap(text, max_line_length): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 152 """Wrap the text into a multi-line text minimizing the longest line length. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 153 | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 154 This is done by first wrapping the text using max_line_length, then | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 155 attempt new wraps by iteratively decreasing the line_length, as long as the | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 156 number of lines stays the same as with max_line_length. | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 157 """ | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 158 narrowest_wrap = textwrap.wrap(text, max_line_length) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 159 narrowest_width = max([len(l) for l in narrowest_wrap]) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 160 for line_length in range(max_line_length - 1, 0, -1): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 161 wrap = textwrap.wrap(text, line_length) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 162 if len(wrap) <= len(narrowest_wrap): | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 163 width = max([len(l) for l in wrap]) | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 164 if width < narrowest_width: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 165 narrowest_wrap = wrap | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 166 narrowest_width = width | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 167 else: | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 168 break | 
| 
e923c686ead9
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 169 return "\n".join(narrowest_wrap) | 
