Previous changeset 48:611cac5e3066 (2019-05-20) |
Commit message:
Uploaded |
added:
CodonSwitchTool/S_aureus_JE2.gbf CodonSwitchTool/cachingseq.py CodonSwitchTool/cachingseq.pyc CodonSwitchTool/codon_switch.py CodonSwitchTool/codon_switch.xml CodonSwitchTool/comparison_syngenic_plasmids.pdf CodonSwitchTool/fastdivmod.py CodonSwitchTool/fastdivmod.pyc CodonSwitchTool/functions.py CodonSwitchTool/functions.pyc CodonSwitchTool/further_information.pdf CodonSwitchTool/pEPSA5_annotated.gb CodonSwitchTool/patterns.txt CodonSwitchTool/run_codon_switch.sh CodonSwitchTool/sre_yield.py CodonSwitchTool/sre_yield.pyc CodonSwitchTool/syngenic.py CodonSwitchTool/syngenic.pyc |
removed:
project_rm/codon_usage.py project_rm/codon_usage.xml |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/S_aureus_JE2.gbf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/S_aureus_JE2.gbf Mon May 20 18:10:52 2019 -0400 |
b |
b'@@ -0,0 +1,92850 @@\n+LOCUS S. 2903350 bp DNA linear 13-NOV-2018\n+DEFINITION Streptococcus aureus strain JE2.\n+ACCESSION \n+VERSION\n+KEYWORDS .\n+SOURCE Streptococcus aureus\n+ ORGANISM Streptococcus aureus\n+ Unclassified.\n+COMMENT Annotated using prokka 1.12 from\n+ https://github.com/tseemann/prokka.\n+FEATURES Location/Qualifiers\n+ source 1..2903350\n+ /organism="Streptococcus aureus"\n+ /mol_type="genomic DNA"\n+ /strain="JE2"\n+ gene complement(40..321)\n+ /locus_tag="SaJE2__00001"\n+ CDS complement(40..321)\n+ /locus_tag="SaJE2__00001"\n+ /inference="ab initio prediction:Prodigal:2.6"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="hypothetical protein"\n+ /translation="MAERISSKIRRLEKSEEQIKLESLNEVTEAIAANKDSILKAIKL\n+ IKTLDDAKLLDALNGAIRGRQVIINNLQLNLIKIFIQGYYLIWLQWYFY"\n+ gene complement(798..1187)\n+ /locus_tag="SaJE2__00002"\n+ CDS complement(798..1187)\n+ /locus_tag="SaJE2__00002"\n+ /EC_number="1.2.1.2"\n+ /inference="ab initio prediction:Prodigal:2.6"\n+ /inference="similar to AA sequence:UniProtKB:Q99RW4"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="Putative formate dehydrogenase"\n+ /translation="MPASPSLERRYFYKYRRRIQRLYQALEPLGDSKPDWKIFQAIAN\n+ RLGFDWNYKHPSEIMDEVARLTPLYAGVSYDRLEGFNSLQWPVQPDGTDEPILYLEGF\n+ NFDNGKANYSHYHLIITLSKMKFMIFM"\n+ gene complement(1165..2010)\n+ /locus_tag="SaJE2__00003"\n+ CDS complement(1165..2010)\n+ /locus_tag="SaJE2__00003"\n+ /EC_number="1.2.1.2"\n+ /inference="ab initio prediction:Prodigal:2.6"\n+ /inference="similar to AA sequence:UniProtKB:Q99RW4"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="Putative formate dehydrogenase"\n+ /translation="MSVLIGTNTAEAHPVIASRMKRAQKLFGQKIHVFDIRKHEMAER\n+ ADRFYQPKPGTDLAWLSAVTKYIIDHDLHDKAFIDEWVDDFDEYYKSLETFTMAFAEE\n+ ATGIPESELIKFAEECAKAESVVICWAMGITQQDIGSDSSTAISNLLLVTGNYRRPGT\n+ GAYPLRGHNNVQGCSDMGSMPDKITGYQSIEADDIRAKFEKEYGVKLNPKAGKDNHEM\n+ VEGIHDGEVHSLYLYGEDTGIVDSNINFVQAAFEKLDFMVVQDEFFNIHSNIRRCCIA\n+ SKSFT"\n+ gene complement(1988..2218)\n+ /locus_tag="SaJE2__00004"\n+ CDS complement(1988..2218)\n+ /locus_tag="SaJE2__00004"\n+ /EC_number="1.2.1.2"\n+ /inference="ab initio prediction:Prodigal:2.6"\n+ /inference="similar to AA sequence:UniProtKB:Q99RW4"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="Putative formate dehydrogenase"\n+ /translation="MAQMHYHSFSSKATNEESYLMQKLARQVIGTNNVDNCSRYCQAP\n+ ATKGLFRTVGHGGDSGSIEDLEKSGNVCIDRY"\n+ gene complement(2184..2399)\n+ /locus_tag="SaJE2__00005"\n+ CDS complement(2184..2399)\n+ /locus_tag="SaJE2__00005"\n+ /EC_number="1.2.1.2"\n+ /inference="ab initio prediction:Prodigal:2.6"\n+ /inference="similar to AA sequence:UniProtKB:Q99RW4"\n+ /codon_start=1\n+ /transl_table=11\n+ /product="Putative formate dehydrogenase"\n+ /translation="MILQQIKIA'..b'c cttctccacg ttctttcgcc tcttctgcta attttaatgc\n+ 2900281 ttcatctaaa tcagctgttt taacatcaca gtatttcgta tcaattcgct tatcaacacg\n+ 2900341 tgtttcatca acatccacgc aaattgctac cccatgattc atagtaattg ctaacggttg\n+ 2900401 cgcaccaccc ataccaccta aacctgctgt cagtgtaaca gtgcctgcta aatctccatt\n+ 2900461 aaagtgttga ttacctagct cggcaaatgt ctcataagta ccttgcacaa taccttgaga\n+ 2900521 accaatatat atccaactac cggctgtcat ctgtccatac atgattaaac cttttttatc\n+ 2900581 taattcatta aaatgatccc agtttgccca ttcaggcact aatactgaat ttgaaattaa\n+ 2900641 tacacgtggc gcttcttcat gtgttttaaa tacagcaact ggctttcctg attgtactaa\n+ 2900701 cattgtctca tctgattcta attctcgtaa cgttttctct attgcttcaa aagcttccca\n+ 2900761 attacgtgct gcttttccaa taccaccata aacaactaaa tcttctggtc tttcagcaac\n+ 2900821 ttctgggtct aaattgttgt ataacattct aagtactgct tcttgttccc aacctttaca\n+ 2900881 ctcaatactc aaaccttttt ttgcttgaat ttttctcata aaattcgctc ctgttctttt\n+ 2900941 aagaagttaa ttccactaaa tttaaaacgc ttacattatt atcttcaata ttcattatag\n+ 2901001 tatgttaaaa tatagccaac aaatataaat aaactaatta tccatagctt gaatctataa\n+ 2901061 ataaaaggag caaaacacat gaaaattatt cagttagaat acttcttggc tatcgtgaaa\n+ 2901121 tataatagtt ttactaaagc tgcacaattt ttacatatta gccagccatc tttaactgct\n+ 2901181 acgattaaaa aaaatggaag cagatttagg ttatgactta tttacacgtt caacaaaaga\n+ 2901241 catcaagatt accgaaaaag gaatacagtt ttatcgttat gcgagcgaat tagttcaaca\n+ 2901301 atatcgatcc acgatggaaa aaatgtatga tttaagcgtt acatcagaac caaggataaa\n+ 2901361 aattgggact cttgaatcta cgaatcaatg gattgcgaat ttaattcgaa agcaccattc\n+ 2901421 cgactaccct gaacagcaat atcgtttata tgaaatacat gataaacatc aatctataga\n+ 2901481 gcaattactg aattttaata ttcatttagc tataacaaat gaaaaataac ccacgaagat\n+ 2901541 ataagatcca ttcctttata tgaggaatct tacattttat tagcacccaa ggaaacattt\n+ 2901601 aaaaatcaaa attgggtaga tgttgaaaat ttgccactca tattaccaaa caaaaattct\n+ 2901661 caagtgcgca aacacttaga tgactatttt aatagaagaa atattcgtcc aaatgtcgtt\n+ 2901721 gtagaaacag atcgattcga atcagcagtt ggatttgttc atctcggctt aggttacgct\n+ 2901781 atcattccga gattttatta ccaatcattt cacacgtcta atttagaata taaaaaaaat\n+ 2901841 tcgtccaaac ttaggccgaa aaatttatat caattaccat aaaaaacgca aacactccga\n+ 2901901 acagtacata cattcgtaca acaatgccaa gattatttat atggactttt agaggctctt\n+ 2901961 taacttaagt tattagagcc tcttatgcag ttgcgcagat catcgtataa aaattaatga\n+ 2902021 cgtcatttca aaaatcgata caaaaataat ttattataaa aattctaaga aagtgaagca\n+ 2902081 gatgttaaaa tctattaatc atatatgctt ttcagtcaga aatttaaacg attcaataca\n+ 2902141 tttttataga gatattttac ttgggaaatt gctattgact ggtaaaaaaa ctgcttattt\n+ 2902201 tgagcttgca ggcctatgga ttgctttaaa tgaagaaaaa gatataccac gtaatgaaat\n+ 2902261 tcacttttca tatacacata tagctttcac tatagatgac agcgaattta aatattggca\n+ 2902321 tcagaggtta aaagataata acgtgaatat tttagaagga agagttagag atattagaga\n+ 2902381 tagacaatca atttacttta ccgaccctga tggtcataag ctagaattac atactggcac\n+ 2902441 acttgagaac agattaaatt attataaaga ggctaaacca catatgacat tttacaaata\n+ 2902501 aggtgtcatt ataaaaaggc ctcttgaact ccgttaaaat tttaattaat tattatataa\n+ 2902561 taagagaact tttcaaacaa tacagttgtt atttttgcta tttcaacaaa cataaataag\n+ 2902621 cagtaagatg actacaactt aagagtcttc ttactgcaat tatttttcaa atatatcaac\n+ 2902681 gttaatataa cttctattaa gaaatactca cattctgccc tgcaatgcaa atctcgtcac\n+ 2902741 atataaatat ttttaattat tttaaaaaat gatgcactaa attagcaacg agcttagcag\n+ 2902801 ttctattgtc agcgtcatat gttggattca tctcagcaat actaactgaa gacaccttat\n+ 2902861 cacttggaat aatacgtttt gctaattcaa gaacagtatg tggatacaaa cctaacactg\n+ 2902921 ccggcgcact taccccaggc gcaaacgcac tatcaatgac atccatacaa atcgtaaaca\n+ 2902981 taatgacatc atgttcatgt acaaaacgtt caatcatatc tttaattgtt ggtgatacgt\n+ 2903041 gactcaataa ttcatctgca aagacataat caatcttttt ctctttagca taatcaaata\n+ 2903101 aaactttgcg tattaccacc ttgagcaata ccaagcacta aataatctgt gttttcatct\n+ 2903161 tcttctaaaa tttgtctaaa gctcgttcca gatgtagatt gttgttcagc acgtgtatca\n+ 2903221 aaatgcgcat caatatttat cacaccaata gattgtgttg gatagacttt acgtgttgct\n+ 2903281 aaatattgag catacgcaat atcatgtcca ccacctaata aaaatgtttg tctatgatta\n+ 2903341 gcaattgact\n+//\n' |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/cachingseq.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/cachingseq.py Mon May 20 18:10:52 2019 -0400 |
[ |
@@ -0,0 +1,55 @@ +#!/usr/bin/env python2 +# +# Copyright 2011-2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# vim: sw=2 sts=2 et + +class CachingFuncSequence(object): + def __init__(self, func, length, inc_func=None): + """ + length: Length of this sequence. + func: function(index) + inc_func: function(index, value_of_previous) + """ + + self.func = func + self.inc_func = inc_func + self.length = length + self._cache = {} + + def __getitem__(self, i): + if i < 0: + i += self.length + if i < 0 or i >= self.length: + raise IndexError() + + v = self._cache.get(i) + if v is not None: + return v + + if self.inc_func and i-1 in self._cache: + v = self.inc_func(i, self._cache[i-1]) + else: + v = self.func(i) + + self._cache[i] = v + return v + + def __len__(self): + return self.length + + def __iter__(self): + for i in range(self.length): + yield self[i] |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/cachingseq.pyc |
b |
Binary file CodonSwitchTool/cachingseq.pyc has changed |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/codon_switch.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/codon_switch.py Mon May 20 18:10:52 2019 -0400 |
[ |
@@ -0,0 +1,172 @@ +#!/usr/bin/env python + +__author__= "Gianmarco Piccinno" +__version__ = "1.0.0" + +from syngenic import * +from functions import * +from Bio import * +import argparse as ap + +if __name__ == '__main__': + + parser = ap.ArgumentParser(description="", formatter_class=ap.RawTextHelpFormatter) + + parser.add_argument( + '-i', '--input_plasmid', help='Input plasmid', required=True) + parser.add_argument( + '-l', '--plasmid_format', help='Format of the plasmid: {fasta, genbank}', required=True) + parser.add_argument( + '-p', '--input_patterns', help='Input patterns separated by new_line', required=True) + parser.add_argument( + '-g', '--input_genome', help='Input annotated genome', required=True) + parser.add_argument( + '-q', '--genome_format', help='Format of the annotated genome: {fasta, gbk}', required=True) + parser.add_argument( + '-c', '--codon_table', help='Codon table to be used {Bacterial}', required=True) + parser.add_argument( + '-m', '--max_row', help='Max row length when print', required=False) + parser.add_argument( + '-d', '--demonstration', help='Use demonstration simplication', required=False) + parser.add_argument( + '-f', '--n_plasmids', help='Use demonstration simplication', required=False) + parser.add_argument( + '-o', '--output_folder', help='Folder for writing the output file', required=True) + args = vars(parser.parse_args()) + + """ + + python codon_switch_v2.py + -i ./pEPSA5_annotated.gb + -l genbank + -p ./patterns.txt + -g S_aureus_JE2.gbf + -q gbk -c Bacterial + -o ./output + + python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -o ./output + + """ + + + pl = SeqIO.read( + open(args['input_plasmid'], "r"), args['plasmid_format']) + + if args['demonstration'] == "demonstration": + pl = pl[0:3000] + pats = read_patterns(args['input_patterns']) + + + ############################################################# + # + ############################################################# + + #pl = fake_from_real(path = "./data/pEPSA5_annotated.gb", id_ = "Trial", name = "Fake_plasmid") + print(type(pl)) + print(pl); print(pl.seq); print(pl.features) + + #for feat in pl.features: + # print(str(feat.extract(pl))) + # print(str(pl[feat.location.start:feat.location.end])) + # print("\n") + + + n_pl = plasmid(pl) + print(n_pl); print(len(n_pl)) + print(n_pl.features) + + + patts, n_patts = all_patterns(input_ = pats) + + + f_patts = n_pl.findpatterns(n_patts, patts) + print(f_patts) + print(pl.seq) + print(len(pl.seq)) + + + n_poss = punctuate_targets(f_patts, n_pl) + print(n_poss) + + print_seq(n_pl.seq) + + synonims_tables = synonims_(table_name=args['codon_table']) + + synonims_tables + + plasmids = generalization(n_poss, n_pl, synonims_tables) + + print(len(plasmids)) + + #plasmids + + #if len(plasmids) > 5000000: + #redo generalization without considering internal bases + #in target sites that are not in CDS + #this means considering only the outer bases of the target + # plasmids = generalization(n_poss, n_pl, synonims_tables, + # reduced = True) + + ######################################################### + # Read plasmid and compute codon usage + ######################################################### + + genome = annotated_genome(read_annotated_genome( + data=args['input_genome'], type_=args['genome_format'])) + + out_genome = genome.codon_usage(args['codon_table']) + print(out_genome.keys()) + print(out_genome["Table"]) + + print(out_genome["Table"].loc["GCA"]["Proportion"]) + print(type(out_genome["Table"].loc["GCA"]["Proportion"])) + + + ######################################################### + # Evaluate the plasmid + ######################################################### + + useful_plasmids = evaluate_plasmids(plasmids = plasmids, + original_plasmid = n_pl, + codon_usage_table = out_genome["Table"], + n_patts = n_patts, + f_patts = patts) + + dat_plasmids = rank_plasmids(original_useful_plasmids = useful_plasmids) + + def_pls = dat_plasmids.index[:int(args['n_plasmids'])] + + for to_save in def_pls: + #print(to_save) + #print(useful_plasmids[to_save]) + with open(to_save+".fa", "w") as handle: + handle.write(">"+to_save+"\n") + handle.write(useful_plasmids[to_save]["sequence"]) + + + + if args['max_row'] != None: + tmp_max_row = int(args['max_row']) + else: + tmp_max_row = 27 + + print_color_seq(original = n_pl, + others = def_pls, + annotation_information = useful_plasmids, + tot = useful_plasmids, + ind_range = None, + patterns = n_poss, + f_patterns = f_patts, + patts = patts, + max_row = tmp_max_row) + + + print_to_pdf(original = n_pl, + others = def_pls, + annotation_information = useful_plasmids, + tot = useful_plasmids, + ind_range = None, + patterns = n_poss, + f_patterns = f_patts, + patts = patts, + max_row = tmp_max_row) |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/codon_switch.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/codon_switch.xml Mon May 20 18:10:52 2019 -0400 |
[ |
@@ -0,0 +1,107 @@ +<?xml version="1.0"?> +<tool name="Codon Switch Tool" id="codon_switch" version="0.2.6"> + <description>for each sequence in a file</description> + <requirements> + <requirement type="package" version="2.7.15">python</requirement> + <requirement type="package" version="1.72">biopython</requirement> + <requirement type="package" version="0.23.4">pandas</requirement> + <requirement type="package" version="1.15.3">numpy</requirement> + </requirements> + + <command> +<![CDATA[ + python '$__tool_directory__/codon_switch.py' -i $input -l $input_type -p $patterns -g $genome -g $genome_type -c $codon_table -f $num_plasmids -m $row_len -d $demonstration -o $output +]]> + </command> + <inputs> + <param name="input" format="genbank" type="data" label="Input plasmid"/> + + <param name="input_type" type="select" format="text"> + <label>Indicate the input file format.</label> + <option value="genbank">genbank</option> + </param> + + <param name="patterns" format="txt" type="data" label="Patterns file"/> + + <param name="genome" format="genbank" type="data" label="Input Genome"/> + + <param name="genome_type" type="select" format="text"> + <label>Indicate the input genome format.</label> + <option value="fasta">fasta</option> + <option value="genbank">genbank</option> + </param> + + <param name="demonstration" type="select" format="text"> + <label>Demonstration</label> + <option value="demonstration">yes</option> + <option value="no_demonstration">no</option> + </param> + + <param name="num_plasmids" type="select" format="text"> + <label>Indicate the number of best output plamids.</label> + <option value=3>3</option> + <option value=7>7</option> + </param> + + <param name="row_len" type="select" format="text"> + <label>Indicate the length of the row in report file.</label> + <option value=9>9</option> + <option value=27>27</option> + </param> + + <param name="codon_table" type="select" format="text"> + <label>Choose the proper codon table for your organism.</label> + <option value="Alternative Flatworm Mitochondrial">"Alternative Flatworm Mitochondrial"</option> + <option value="Alternative Yeast Nuclear">"Alternative Yeast Nuclear"</option> + <option value="Archaeal">"Archaeal"</option> + <option value="Ascidian Mitochondrial">"Ascidian Mitochondrial"</option> + <option value="Bacterial">"Bacterial"</option> + <option value="Blastocrithidia Nuclear">"Blastocrithidia Nuclear"</option> + <option value="Blepharisma Macronuclear">"Blepharisma Macronuclear"</option> + <option value="Candidate Division SR1">"Candidate Division SR1"</option> + <option value="Chlorophycean Mitochondrial">"Chlorophycean Mitochondrial"</option> + <option value="Ciliate Nuclear">"Ciliate Nuclear"</option> + <option value="Coelenterate Mitochondrial">"Coelenterate Mitochondrial"</option> + <option value="Condylostoma Nuclear">"Condylostoma Nuclear"</option> + <option value="Dasycladacean Nuclear">"Dasycladacean Nuclear"</option> + <option value="Echinoderm Mitochondrial">"Echinoderm Mitochondrial"</option> + <option value="Euplotid Nuclear">"Euplotid Nuclear"</option> + <option value="Flatworm Mitochondrial">"Flatworm Mitochondrial"</option> + <option value="Gracilibacteria">"Gracilibacteria"</option> + <option value="Hexamita Nuclear">"Hexamita Nuclear"</option> + <option value="Invertebrate Mitochondrial">"Invertebrate Mitochondrial"</option> + <option value="Karyorelict Nuclear">"Karyorelict Nuclear"</option> + <option value="Mesodinium Nuclear">"Mesodinium Nuclear"</option> + <option value="Mold Mitochondrial">"Mold Mitochondrial"</option> + <option value="Mycoplasma">"Mycoplasma"</option> + <option value="Pachysolen tannophilus Nuclear">"Pachysolen tannophilus Nuclear"</option> + <option value="Peritrich Nuclear">"Peritrich Nuclear"</option> + <option value="Plant Plastid">"Plant Plastid"</option> + <option value="Protozoan Mitochondrial">"Protozoan Mitochondrial"</option> + <option value="Pterobranchia Mitochondrial">"Pterobranchia Mitochondrial"</option> + <option value="SGC0">"SGC0"</option> + <option value="SGC1">"SGC1"</option> + <option value="SGC2">"SGC2"</option> + <option value="SGC3">"SGC3"</option> + <option value="SGC4">"SGC4"</option> + <option value="SGC5">"SGC5"</option> + <option value="SGC8">"SGC8"</option> + <option value="SGC9">"SGC9"</option> + <option value="Scenedesmus obliquus Mitochondrial">"Scenedesmus obliquus Mitochondrial"</option> + <option value="Spiroplasma">"Spiroplasma"</option> + <option value="Standard">"Standard"</option> + <option value="Thraustochytrium Mitochondrial">"Thraustochytrium Mitochondrial"</option> + <option value="Trematode Mitochondrial">"Trematode Mitochondrial"</option> + <option value="Vertebrate Mitochondrial">"Vertebrate Mitochondrial"</option> + <option value="Yeast Mitochondrial">"Yeast Mitochondrial"</option> + </param> + </inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + + <help> +This tool permits codon switch and transversion in targeted regions. + </help> +</tool> |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/comparison_syngenic_plasmids.pdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/comparison_syngenic_plasmids.pdf Mon May 20 18:10:52 2019 -0400 |
[ |
b'@@ -0,0 +1,583 @@\n+%PDF-1.4\n+%\x93\x8c\x8b\x9e ReportLab Generated PDF document http://www.reportlab.com\n+1 0 obj\n+<<\n+/F1 2 0 R\n+>>\n+endobj\n+2 0 obj\n+<<\n+/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font\n+>>\n+endobj\n+3 0 obj\n+<<\n+/Contents 34 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+4 0 obj\n+<<\n+/Contents 35 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+5 0 obj\n+<<\n+/Contents 36 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+6 0 obj\n+<<\n+/Contents 37 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+7 0 obj\n+<<\n+/Contents 38 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+8 0 obj\n+<<\n+/Contents 39 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+9 0 obj\n+<<\n+/Contents 40 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+10 0 obj\n+<<\n+/Contents 41 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+11 0 obj\n+<<\n+/Contents 42 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+12 0 obj\n+<<\n+/Contents 43 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+13 0 obj\n+<<\n+/Contents 44 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+14 0 obj\n+<<\n+/Contents 45 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+15 0 obj\n+<<\n+/Contents 46 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+16 0 obj\n+<<\n+/Contents 47 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+17 0 obj\n+<<\n+/Contents 48 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+18 0 obj\n+<<\n+/Contents 49 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+19 0 obj\n+<<\n+/Contents 50 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+20 0 obj\n+<<\n+/Contents 51 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n+/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n+>> /Rotate 0 /Trans <<\n+\n+>> \n+ /Type /Page\n+>>\n+endobj\n+21 0 obj\n+<<\n+/Contents 52 0 R /MediaBox [ 0 0 612 792 ]'..b'<\'0C\'i8=g_,3j,m4r6_4bNK6,=1cJgEAk/Kq\'##nBBs#RZk/\'J.)"BF-s.9*bfhi)I(:TO8\'l_%N8i,m4?D#RZmU<4%=O?jT+&95l:_6;Xn>TO8)rQ*6K^qtSWYJ%bjWh<g+^D$kXp(PRBk-NpejJhsWaJ:]$L\'HFq4-pg1g+ZKQi#UoI65X>o#JfP!$a[%Vs%30s<";K\'&THFFK"YU-.BL+o"$64X3";I@KTM5`_JIr#!KHQrG"7c%j^BWH5<u&B\\]t8R/495![o`1X0o`1X,Qj<D(n,V>T*oOZH"+Zt@&+D@Y%r_^oi9,h"`(IiJHmO#fY7s)A((),@&3RaPK*MMn4MW<\\4MV1,&+JE5!PTL8^peq$piaIUS:Lq?Ol-8F;XaZnl/SCBDBFB%lK8\'2gZR/t\'*22f!^KunJ>`bKD@cj+%3j`B5m2_J#l\'g>#WMnun8gr"@\\k1K^(NiL<sg,_$O%,]#U83=5m.5r,2eXm,2e*+#l&9K!T+>1i7gO#I(LL&c9MO18.^>S.>WMX1ff$77;uW]!6e.;LmnAU2$)SD-K)MP"FN.q!"_\'i5>s6:B4(diDO<\\j\'MQ;DJdM3E_$"5@?ighS"FN.q5X<-gHud%+HuLb?h[Lpj]>4Vt4.(P:(JMU^R"-7b-4h[2\'C_:,of&*m!I[ZBR%"\'e2ZY%9!koUI^itD;phhbd+Dq70VI0JN^]aSj0S*=TR"2@C\'C_9=$,f0H"<<""phhd:J:\\+&jFR^5i*ZW#)be%kK*h<r0EL&`R"-tH!C%2j+BAgri%(o_A$upJGk_-TTCf[%_qOd.r5g\'j,C_V-fL2>A+uSuTK8;,W#m"/o]]t@"#SD-B(nVEbk*-m2:5DliT+iL#o4Nc53AZIe$P@Gl&>\'a_k0*g$%$>RB&>\'gak0+r32#us4)8t]mLcI8I:C[tXAq4NL83&J^:Ce%YAe:RV,3I)l5!_T\\R6c4TLmTt#4@MZ\\R6c4\\LmTuNp`omApKmgY=X%=Q1O+9>bPL8V1od+ts+^3=bVj3?qju^<ZhKUMi-cmMC:WM/AoBkr*khIG\\ueq;N\\!iR<N7[Ha&7&FWln7.WOb^PAQ2O,Y#9Cj@Yn!*Q)`#0U/YEpl:N=!Wif6BQ)a.*<L3rYWlp%#X"Fq*YIY]%^ZohTqM_GtK.^8K3T%9L<L<I9*E.*+OK#f2aAcFdA#_-l10\'\\8m1rWCH\'##:]^P9Y<aF"5Ct7W5VX!`Fb>qnR"9Q`K]!r#dW\\,oUN2heo+K+Ma,.L1o`C1ot02m"5&TGfj?!IL610#,,JUgGE77G"QA?.=8!WcT:lJa-%:j1C.9or:rE>)k^?/,QC<>RU17B.tX!g.r"Wf*?s6`0/.-4l9<]!r%LA?%7s!e)D!F`A\'#ke^=92h?LbX"J:AS>qL:+J\\5]-HN0t\\-XccF`A(NoZL3>g]STP<>RTFJ5"M49ctQWF\\rhA,MB)TRm94L;K_Y-acKJ-]p$eg<EJW71q)]7SUr2f\\pC9I8ZJrM.;4mEX*52FV!@69KZ7%uKC_\\cWO`:(_b&B93&=+*Y/-Ml;KWGo$uFHO3&=+*Y/-Ml;KWGo%$IW+E+Y22?%Xp8>BPqN%$IW+FCpV6?)\'3.V!8kh)\'i56kfghC?)\'3.V!8kh)\'i56kfb0K]#N2:gfJW@b@@Zcg9+C\\N\\!iR<N7[Ha&7&FWln7.WOb^PAQ2O,Y#9Cj@Yn!*Q)`#0U/YEpl:N=!Wif6BQ)a.*<L<;if!al7hQrh[FZI(eWlnTqs/msb3j$!+Dmu+6bBp/c"5VMhr2G*ne`FK$56%dg>ufLJ]=PY!g7,+c>KBcLcB2qnjh5:kql[)-<S.\'7lLK]#?r\\!ZJ[j&W]B\\B"&1cl"eO;nL=tETpE=n`>XtD2US>qL*+Qoo1>ufLJ]=PKX`\'U?diZ\\pMH=c!a[B&RRhd\'Nj#Tg)4i[,3QH=^/`g]V)_r4BP=#t0\\_W\\/1@Zes;%9or:rE>.Ege[;ICg2g=32h>)P<*e8^i^0j8]=N/c?0Ai#b;I+g<F+A_i^0j8Wf*?3XH\\39(H$bWYtW]e9lTg<\\u5pUW\\/1@S@c@U<F\'t>lJa-%$^n#QRm:6BQ$l`cY,^f8GBN]l#MY#X=\'_S"cm2WCjLhijWijJ6Nifl>Vk-GZY/r,+P,`;<$C[dUR[f$XWOaE(Ts7VLj:jFBeMT?%;c7a4c4#l9CNa<O<*cgd"\']omc4#l9CNa<O<*cgdK;@e$SJ`\\QfBg_s<aE$fK;@e$SJ`\\QfBg^(WO`jX#"d@Q3&@d=fA]du[d%%d)\'i56iliOE\\sCe_gfJX+c"!leLAt3%oq0<"*r@n5+8`P253I8"hM_@1^].fi42oPAelI7jSA2:9cWaW"qHc:5oUHlOpUh=2XD]0>8\'e^YHI&(9cLl(1`OhYk^+Na)bA$A0il-0T@UBp)VlrDS%B^f#s-]pg^B)aE%Sd~>endstream\n+endobj\n+xref\n+0 62\n+0000000000 65535 f \n+0000000073 00000 n \n+0000000104 00000 n \n+0000000211 00000 n \n+0000000406 00000 n \n+0000000601 00000 n \n+0000000796 00000 n \n+0000000991 00000 n \n+0000001186 00000 n \n+0000001381 00000 n \n+0000001576 00000 n \n+0000001772 00000 n \n+0000001968 00000 n \n+0000002164 00000 n \n+0000002360 00000 n \n+0000002556 00000 n \n+0000002752 00000 n \n+0000002948 00000 n \n+0000003144 00000 n \n+0000003340 00000 n \n+0000003536 00000 n \n+0000003732 00000 n \n+0000003928 00000 n \n+0000004124 00000 n \n+0000004320 00000 n \n+0000004516 00000 n \n+0000004712 00000 n \n+0000004908 00000 n \n+0000005104 00000 n \n+0000005300 00000 n \n+0000005496 00000 n \n+0000005692 00000 n \n+0000005762 00000 n \n+0000006046 00000 n \n+0000006296 00000 n \n+0000010689 00000 n \n+0000014290 00000 n \n+0000017887 00000 n \n+0000021473 00000 n \n+0000025034 00000 n \n+0000028620 00000 n \n+0000032178 00000 n \n+0000035887 00000 n \n+0000039533 00000 n \n+0000044441 00000 n \n+0000048062 00000 n \n+0000051584 00000 n \n+0000055200 00000 n \n+0000058791 00000 n \n+0000062614 00000 n \n+0000066297 00000 n \n+0000069880 00000 n \n+0000073536 00000 n \n+0000077245 00000 n \n+0000080911 00000 n \n+0000084650 00000 n \n+0000088341 00000 n \n+0000091968 00000 n \n+0000095725 00000 n \n+0000099373 00000 n \n+0000103020 00000 n \n+0000106516 00000 n \n+trailer\n+<<\n+/ID \n+[<0f726644b6ac1201feda0e0957c03aa4><0f726644b6ac1201feda0e0957c03aa4>]\n+% ReportLab generated PDF document -- digest (http://www.reportlab.com)\n+\n+/Info 32 0 R\n+/Root 31 0 R\n+/Size 62\n+>>\n+startxref\n+109568\n+%%EOF\n' |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/fastdivmod.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/fastdivmod.py Mon May 20 18:10:52 2019 -0400 |
[ |
@@ -0,0 +1,93 @@ +#!/usr/bin/env python2 +# +# Copyright 2011-2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# vim: sw=2 sts=2 et + +from math import log, ceil +import sys + + +def find_largest_power(less_than, base): + power = int(log(less_than) / log(base)) + return base ** power + + +def divmod_iter(x, by, chunk=None): + if x < by: + return [x] + + if hasattr(x, 'bit_length'): + # crude log(2, x) + divisions = x.bit_length() // by.bit_length() + else: + divisions = log(x) / log(by) + + if divisions < 1024: + return divmod_iter_basic(x, by, chunk) + else: + return divmod_iter_chunking(x, by, chunk) + + +def divmod_iter_chunking(x, by, chunk=None): + """Generate successive (x % by); x /= by, but faster. + + If provided, |chunk| must be a power of |by| (otherwise it is determined + automatically for 1024 per inner loop, based on analysis of bench_genmod.py) + """ + + if by == 1: + assert x == 0, x + yield 0 + return + + if chunk is None: + digits_per_chunk = 1024 + chunk = by ** digits_per_chunk + else: + digits_per_chunk = int(round(log(chunk) / log(by))) + if (by ** digits_per_chunk) != chunk: + raise ValueError("Chunk=%d must be a power of by=%d" % (chunk, by)) + + assert digits_per_chunk > 0 + + while x: + x, this_chunk = divmod(x, chunk) + #this_chunk = int(this_chunk) + for _ in range(digits_per_chunk): + this_chunk, m = divmod(this_chunk, by) + yield m + + if this_chunk == 0 and x == 0: + break + + +def divmod_iter_basic(x, by, chunk=None): + """Generate successive (x % by); x /= by, the obvious way. + + Chunk is ignored. + """ + while x: + x, m = divmod(x, by) + yield m + +def powersum(x, low, high): + # http://mikestoolbox.com/powersum.html + xm1 = x - 1 + if xm1 == 0: + return high - low + 1 + a = x ** (high + 1) + b = x ** low + return (a - b) // xm1 |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/fastdivmod.pyc |
b |
Binary file CodonSwitchTool/fastdivmod.pyc has changed |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/functions.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/functions.py Mon May 20 18:10:52 2019 -0400 |
[ |
b'@@ -0,0 +1,960 @@\n+import string\r\n+from syngenic import *\r\n+from Bio.Seq import Seq\r\n+from Bio.SeqFeature import SeqFeature, FeatureLocation\r\n+from pprint import pprint\r\n+\r\n+from itertools import izip\r\n+\r\n+import numpy as np\r\n+import pandas as pd\r\n+\r\n+def all_patterns(input_ = []):\r\n+\r\n+ patts = []\r\n+ n_patts = []\r\n+\r\n+ for patt in input_:\r\n+ tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)\r\n+ tmp_revc = tmp_patt.reverse_complement()\r\n+\r\n+ patts.append(str(tmp_patt))\r\n+ patts.append(str(tmp_revc))\r\n+\r\n+ n_patts.append(pattern(tmp_patt).plan_ambiguity())\r\n+ n_patts.append(pattern(tmp_revc).plan_ambiguity())\r\n+\r\n+\r\n+ return patts, n_patts\r\n+\r\n+def fake_from_real(path = None, id_ = None, name = None):\r\n+\r\n+ plasmid_seq = SeqIO.read(open(path, "r"), "genbank")\r\n+\r\n+ f_p = plasmid_seq.seq[:10]\r\n+ f_CDS = []\r\n+ for f in plasmid_seq.features:\r\n+ if f.type == "CDS":\r\n+ tmp_start = len(f_p)\r\n+ tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]\r\n+ tmp_end = tmp_start + len(tmp_cds)\r\n+ f_p += tmp_cds\r\n+ f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))\r\n+ #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]\r\n+ f_p += plasmid_seq.seq[-10:]\r\n+\r\n+ for feat in f_CDS:\r\n+ f_p.features.append(feat)\r\n+ f_p.id = id_\r\n+ f_p.name = name\r\n+\r\n+ #feature_seq_0 = f_CDS[0].extract(f_p)\r\n+\r\n+ return f_p\r\n+\r\n+def punctuate_targets(f_patts, n_pl):\r\n+\r\n+ n_poss = {}\r\n+ max_len = len(n_pl)\r\n+ for key in f_patts.keys():\r\n+ for el in f_patts[key]:\r\n+ if not el[2] < el[1]:\r\n+ tmp = range(el[1], el[2])\r\n+ for i in range(len(tmp)):\r\n+ if not tmp[i] in n_poss.keys():\r\n+ n_poss[tmp[i]] = [key[i]]\r\n+ else:\r\n+ n_poss[tmp[i]].append(key[i])\r\n+ else:\r\n+ tmp = range(el[1], max_len) + range(0, el[2])\r\n+ for i in range(len(tmp)):\r\n+ if not tmp[i] in n_poss.keys():\r\n+ n_poss[tmp[i]] = [key[i]]\r\n+ else:\r\n+ n_poss[tmp[i]].append(key[i])\r\n+\r\n+ for key in n_poss.keys():\r\n+ n_poss[key] = set(n_poss[key])\r\n+\r\n+ #print(n_poss)\r\n+\r\n+ return n_poss\r\n+\r\n+\r\n+def print_seq(n_pl, ind_range = None):\r\n+\r\n+ if ind_range == None:\r\n+\r\n+ data = filter(None, re.split(r\'(\\w{1})\', n_pl))\r\n+ index = range(len(n_pl))\r\n+\r\n+ seq = []\r\n+ ind = []\r\n+\r\n+ j = 0\r\n+\r\n+ seq.append("")\r\n+ ind.append("")\r\n+\r\n+ for i in range(len(data)):\r\n+\r\n+ if (i % 9 == 0) & (i > 0):\r\n+ j += 1\r\n+ seq.append("")\r\n+ ind.append("")\r\n+ print("\\n")\r\n+ print(seq[j-1])\r\n+ print(ind[j-1])\r\n+\r\n+\r\n+ seq[j] += " "\r\n+ ind[j] += " "\r\n+ for n in range(len(str(index[i]))-1):\r\n+ seq[j] += " "\r\n+ seq[j] += data[i]\r\n+ ind[j] += str(index[i])\r\n+ print("\\n")\r\n+ print(seq[j])\r\n+ print(ind[j])\r\n+ else:\r\n+ data = filter(None, re.split(r\'(\\w{1})\', n_pl[ind_range[0]:ind_range[1]]))\r\n+ index = range(ind_range[0], ind_range[1])\r\n+\r\n+ seq = []\r\n+ ind = []\r\n+\r\n+ j = 0\r\n+\r\n+ seq.append("")\r\n+ ind.append("")\r\n+\r\n+ for i in range(len(data)):\r\n+\r\n+ if (i % 9 == 0) & (i > 0):\r\n+ j += 1\r\n+ seq.append("")\r\n+ ind.append("")\r\n+ print("\\n")\r\n+ print(seq[j-1])\r\n+ print(ind[j-1])\r\n+\r\n+\r\n+ seq[j] += " "\r\n+ ind[j] += " "\r\n+ for n in range(len(str(index[i]))-1):\r\n+ seq[j] += '..b'arget_positions,\r\n+ 2:annot,\r\n+ 3:direction,\r\n+ 4:["Original"] + sequences["original"],\r\n+ 5:new_plasmids,\r\n+ 6:index}\r\n+\r\n+ doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,\r\n+ rightMargin=30,leftMargin=30,\r\n+ topMargin=30,bottomMargin=30)\r\n+\r\n+ elements = []\r\n+ #max_row = 18\r\n+ blocks = {}\r\n+\r\n+ if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:\r\n+ n_blocks = len(range(max_row, len(original.seq)+1, max_row))\r\n+ else:\r\n+ n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1\r\n+\r\n+ j = 0\r\n+\r\n+ for i in range(n_blocks):\r\n+ blocks[i] = []\r\n+ for l in range(7):\r\n+ if l in [0, 5]:\r\n+ for el in data[l]:\r\n+ if len(el[j:]) > max_row:\r\n+ if i >= 1:\r\n+ blocks[i].append([el[0]] + el[j:j+max_row])\r\n+ else:\r\n+ blocks[i].append(el[j:j+max_row])\r\n+ else:\r\n+ blocks[i].append([el[0]] + el[j:])\r\n+ else:\r\n+ if len(data[l][j:]) > max_row:\r\n+ if i >= 1:\r\n+ blocks[i].append([data[l][0]] + data[l][j:j+max_row])\r\n+ else:\r\n+ blocks[i].append(data[l][j:j+max_row])\r\n+ else:\r\n+ blocks[i].append([data[l][0]] + data[l][j:])\r\n+ j += max_row\r\n+ #print("\\n")\r\n+ #print(blocks[i])\r\n+\r\n+ elements.append(Table(blocks[i], hAlign=\'LEFT\'))#,\r\n+ #style=[(\'BACKGROUND\',(0,0),(0,0),colors.palegreen),\r\n+ # (\'BACKGROUND\',(1,1),(1,1),colors.palegreen),\r\n+ # (\'TEXTCOLOR\',(2,2),(3,2),colors.palegreen),\r\n+ # (\'BOX\',(0,0),(0,0),2,colors.red)]))\r\n+ elements.append(Table([["", "", "", "", ""]]))\r\n+\r\n+ doc.build(elements)\r\n+\r\n+\r\n+ #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,\r\n+ # rightMargin=30,leftMargin=30,\r\n+ # topMargin=30,bottomMargin=30)\r\n+ #new_elements = []\r\n+\r\n+ #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])\r\n+ #new_elements.append(f_patterns)\r\n+\r\n+ #doc.build(new_elements)\r\n+\r\n+ c = canvas.Canvas("./further_information.pdf")\r\n+ c.drawString(100,750,"CDS regions:")\r\n+ upper_bound = 750\r\n+ for feat in original.features:\r\n+ if feat.type.lower() in ["gene", "cds"]:\r\n+ upper_bound -= 15\r\n+ if feat.location.strand == -1:\r\n+ sign = "-"\r\n+ else:\r\n+ sign = "+"\r\n+ c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")\r\n+ upper_bound -= 30\r\n+ c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")\r\n+ for f_pattern in f_patterns.keys():\r\n+ upper_bound -= 15\r\n+ c.drawString(115,upper_bound,f_pattern + ":")\r\n+ for val in f_patterns[f_pattern]:\r\n+ upper_bound -= 15\r\n+ c.drawString(130,upper_bound,str(val))\r\n+ upper_bound -= 5\r\n+\r\n+ upper_bound -= 30\r\n+ c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")\r\n+ for target in targets.keys():\r\n+ upper_bound -= 15\r\n+ c.drawString(115,upper_bound,target + ": " + targets[target])\r\n+\r\n+ c.save()\r\n+\r\n+\r\n+ return\r\n+\r\n+\r\n+def produce_random_targets(sequence):\r\n+\r\n+ # Produce a target on two continous CDS\r\n+ # Produce a target in a non-coding region\r\n+ # Produce a target in coding region\r\n+ # Produce a target on a overlapping left\r\n+ # Produce a target on a overlapping right\r\n+\r\n+\r\n+\r\n+ return\r\n' |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/functions.pyc |
b |
Binary file CodonSwitchTool/functions.pyc has changed |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/further_information.pdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/further_information.pdf Mon May 20 18:10:52 2019 -0400 |
[ |
@@ -0,0 +1,68 @@ +%PDF-1.3 +%���� ReportLab Generated PDF document http://www.reportlab.com +1 0 obj +<< +/F1 2 0 R +>> +endobj +2 0 obj +<< +/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font +>> +endobj +3 0 obj +<< +/Contents 7 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 6 0 R /Resources << +/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] +>> /Rotate 0 /Trans << + +>> + /Type /Page +>> +endobj +4 0 obj +<< +/PageMode /UseNone /Pages 6 0 R /Type /Catalog +>> +endobj +5 0 obj +<< +/Author (anonymous) /CreationDate (D:20190520215146-01'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20190520215146-01'00') /Producer (ReportLab PDF Library - www.reportlab.com) + /Subject (unspecified) /Title (untitled) /Trapped /False +>> +endobj +6 0 obj +<< +/Count 1 /Kids [ 3 0 R ] /Type /Pages +>> +endobj +7 0 obj +<< +/Filter [ /ASCII85Decode /FlateDecode ] /Length 412 +>> +stream +Gat%];+ne\'SYH=/+-V'RG)1GX*r0c#!n!fb@#-MPVo!DQd3&:s(4iV+Y7"&pS%Q2c+W9O8F.Y*DYF,eK\IjUKEB^id[Ls?"WhYo^QS[E8HLnF!;@%_LGrns:,f%#`dj\3=+=r$+J1DZY6iBC5q/MrS+q:c%Oe:E^S]JE'f8_8TYaHgh0[+Vg%VL(*L*FQr^4N1Bj9+.)H1@VK>)/GWpcT8mcWQ0m.S#B:444LfrDh#=mhX7K$W7UTTgUkH%%J8/qapF1gbt_K25`Ea%DAYkc*B_n7WJ*<ksjXWC@O?)e5e7AJ9m,0h/QJaQtJJNB$bXKt-oZV:jZ9^#1EU'$;>hH%%Xg+GH'$Q`Eal3t.n@LZ_9t?IrAg2R(m=p7#FF*,WFdh^L>?Va8pi(3&F/R0lDJ#Dp=`~>endstream +endobj +xref +0 8 +0000000000 65535 f +0000000073 00000 n +0000000104 00000 n +0000000211 00000 n +0000000414 00000 n +0000000482 00000 n +0000000778 00000 n +0000000837 00000 n +trailer +<< +/ID +[<a048fbc90e461707589cdb3f47c562fc><a048fbc90e461707589cdb3f47c562fc>] +% ReportLab generated PDF document -- digest (http://www.reportlab.com) + +/Info 5 0 R +/Root 4 0 R +/Size 8 +>> +startxref +1339 +%%EOF |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/pEPSA5_annotated.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/pEPSA5_annotated.gb Mon May 20 18:10:52 2019 -0400 |
b |
b'@@ -0,0 +1,192 @@\n+LOCUS Exported 6850 bp ds-DNA circular SYN 13-NOV-2018\r\n+DEFINITION synthetic circular DNA\r\n+ACCESSION .\r\n+VERSION .\r\n+KEYWORDS .\r\n+SOURCE synthetic DNA construct\r\n+ ORGANISM synthetic DNA construct\r\n+REFERENCE 1 (bases 1 to 6850)\r\n+ AUTHORS .\r\n+ TITLE Direct Submission\r\n+ JOURNAL Exported Nov 13, 2018 from SnapGene 4.2.6\r\n+ http://www.snapgene.com\r\n+COMMENT LOCUS dna 6850 bp\r\n+FEATURES Location/Qualifiers\r\n+ source 1..6850\r\n+ /organism="synthetic DNA construct"\r\n+ /mol_type="other DNA"\r\n+ misc_feature 1..819\r\n+ /label=p15A origin of replication region\r\n+ /note="p15A origin of replication region"\r\n+ misc_feature 1032..3625\r\n+ /label=Staphylococcus aureus plasmid pC194\r\n+ /note="Staphylococcus aureus plasmid pC194 region\r\n+ (NC_002013.1)"\r\n+ misc_feature 1126..1324\r\n+ /label=Repeat sequence 1 of 2\r\n+ /note="Repeat sequence 1 of 2"\r\n+ CDS complement(1562..2212)\r\n+ /label=Chloramphenicol Resistance\r\n+ CDS complement(2346..2744)\r\n+ /label=Potential Copy number associated protein\r\n+ /note="Potential Copy number associated protein"\r\n+ misc_feature 2463..2517\r\n+ /label=pC194 replication origin\r\n+ /note="pC194 replication origin (detailed in PMC401278)"\r\n+ misc_feature 2670..3605\r\n+ /label=Replication initiation protein Rep RC\r\n+ misc_feature 3626..3824\r\n+ /label=Repeat sequence 2 of 2\r\n+ /note="Repeat sequence 2 of 2"\r\n+ misc_feature 3825..5278\r\n+ /label=Xylose Inducible Promoter Region\r\n+ /note="Xylose Inducible Promoter Region, Xyl promoter, Xyl\r\n+ repressor protein, Xyl terminator sequence and pTX5\r\n+ promoter facing MCS:\r\n+ http://parts.igem.org/Part:BBa_K1323014"\r\n+ CDS 4000..5151\r\n+ /label=XylR repressor protein\r\n+ misc_feature 5191..5241\r\n+ /label=Bacteriophage T5 PN25 promoter\r\n+ /note="Bacteriophage T5 PN25 promoter (Deuschle et al,\r\n+ PMC1167251)"\r\n+ misc_feature 5206..5211\r\n+ /label=-30 region\r\n+ /note="-30 region"\r\n+ misc_feature 5229..5234\r\n+ /label=-10 region\r\n+ /note="-10 region"\r\n+ misc_feature 5241..5279\r\n+ /label=Xyl operator containing palindromic\r\n+ /note="interfering with transcription initiation"\r\n+ misc_feature 5279..5317\r\n+ /label=MCS\r\n+ /note="MCS"\r\n+ terminator 5393..5550\r\n+ /label=rrnB term\r\n+ terminator 5516..5559\r\n+ /label=rrnB_T1 term\r\n+ terminator 5691..5718\r\n+ /label=rrnB_T2 term\r\n+ misc_feature complement(5860..6720)\r\n+ /label=Ampicillin Resistance\r\n+ /note="gene of the plasmid pLEX5BA"\r\n+ promoter complement(6762..6790)\r\n+ /label=amp prom\r\n+ORIGIN\r\n+ 1 ggcggccgca ctggcttact atgttggcac tgatgagggt gtcagtgaag tgcttcatgt\r\n+ 61 ggcaggagaa aaaaggctgc accggtgcgt cagcagaata tgtgatacag gatatattcc\r\n+ 121 gcttcctcgc tcactgactc gctacgctcg gtcgttcgac tgcggcgagc ggaaatggct\r\n+ 181 tacgaacggg gcggagattt cctggaagat gccaggaaga tacttaacag ggaagtgaga\r\n+ 241 gggccgcggc aaagccgttt ttccataggc tccgcccccc tgacaagcat cacgaaatct\r\n+ 301 gacgctcaaa tcagtggtgg cgaaacccga caggactata aagataccag gcgtttcccc\r\n+ 361 ctggcggct'..b'781 ataaaaccac tcctttttaa caaactttat cacaagaaat attttggcat tctacgacta\r\n+ 3841 taacttaaat ttatattttt tactttataa tatataattg attatagaat aatgttgctc\r\n+ 3901 atatcgtttg ccaacatcta gtactcaaat tacactatgt tacacttggt aatattaacc\r\n+ 3961 gaacttcccc tgtccaaatt agataagagg taataataaa tggaaaataa ttttatagta\r\n+ 4021 aatgaaaatg agaagcgtgt attaaaacaa attttcaata acagcaatat ttcacgaaca\r\n+ 4081 caaatatcga agaatttaga acttaataaa gctactattt ctaacattct gaacaactta\r\n+ 4141 aaacacaaga gtttagttaa tgaagtagga gaaggtaata gtactaaaag tggtggacga\r\n+ 4201 aagcctattt tactcgaaat taaccaaaaa tatggctact atatttctat ggatttaaca\r\n+ 4261 tatgattccg ttgaattaat gtacaactac tttgatgcta ctatattaaa gcaagattcc\r\n+ 4321 tacgaattaa atgataaaaa tgtaagcagt atattacaaa ttttaaaatc taatataaac\r\n+ 4381 gtctcagaaa aatatgatac gttatatggg ttacttggta tatctatatc catacacggt\r\n+ 4441 atcgttgacg atgagcaaaa cataatcaat cttccttttc ataaaaatga gaaacgcaca\r\n+ 4501 tttaccgatg aattaaagtc attcacaaat gttcctgtcg ttatagaaaa tgaagcaaat\r\n+ 4561 ttatcagcgc tatatgaaaa aagtttatat attaattcaa acataaataa tttgattact\r\n+ 4621 ttaagtattc acaagggtat aggcgctggc atcctaataa ataaaaaact ttatcgtggc\r\n+ 4681 tcaaatggag aggctggaga gataggtaag acattggttt tggaatctat aaataacaat\r\n+ 4741 gacaacaaat attataaaat cgaagatata tgctcccaag acgctttaat acagaaaata\r\n+ 4801 aataataggt tgggcgtcac attgacgttt acagaactaa tccaatatta caacgaagga\r\n+ 4861 aattcaattg ttgctcatga aattaaacaa tttattaata aaatgacagt tctgattcat\r\n+ 4921 aatttgaata cacaatttaa cccagacgct atttatatta actgtccttt aattaatgaa\r\n+ 4981 ttaccaaata ttttaaatga aattaaagag caattctcct gtttttctca aggcagtcca\r\n+ 5041 gttcaattac atttaactac taatgtaaaa caagctactt tattgggtgg cactttagca\r\n+ 5101 ataatgcaaa aaacattaaa tataaataac attcaaatga atattaaata attacagcag\r\n+ 5161 tctgagttat aaaatagata tctcggaccg tcataaaaaa tttatttgct ttcaggaaaa\r\n+ 5221 tttttctgta taatagattc aagttagttt gtttattaaa ttaaccaact aaaatgtaga\r\n+ 5281 attcgagctc ggtacccggg gatcctctag agtcgacctg cagccaagct tgggcttttc\r\n+ 5341 agcctgatac agattaaatc agaacgcaga agcggtctga taaaacagaa tttgcctggc\r\n+ 5401 ggcagtagcg cggtggtccc acctgacccc atgccgaact cagaagtgaa acgccgtagc\r\n+ 5461 gccgatggta gtgtggggtc tccccatgcg agagtaggga actgccaggc atcaaataaa\r\n+ 5521 acgaaaggct cagtcgaaag actgggcctt tcgttttatc tgttgtttgt cggtgaacgc\r\n+ 5581 tctcctgagt aggacaaatc cgccgggagc ggatttgaac gttgcgaagc aacggcccgg\r\n+ 5641 agggtggcgg gcaggacgcc cgccataaac tgccaggcat caaattaagc agaaggccat\r\n+ 5701 cctgacggat ggcctttttg cgtttctaca aactcttttg tttatttttc taaatacatt\r\n+ 5761 caaatatgta tccgctcatc cccatcctat cgatgataag ctgtcaaaca tgagaattaa\r\n+ 5821 atcaatctaa agtatatatg agtaaacttg gtctgacagt taccaatgct taatcagtga\r\n+ 5881 ggcacctatc tcagcgatct gtctatttcg ttcatccata gttgcctgac tccccgtcgt\r\n+ 5941 gtagataact acgatacggg agggcttacc atctggcccc agtgctgcaa tgataccgcg\r\n+ 6001 agacccacgc tcaccggctc cagatttatc agcaataaac cagccagccg gaagggccga\r\n+ 6061 gcgcagaagt ggtcctgcaa ctttatccgc ctccatccag tctattaatt gttgccggga\r\n+ 6121 agctagagta agtagttcgc cagttaatag tttgcgcaac gttgttgcca ttgctacagg\r\n+ 6181 catcgtggtg tcacgctcgt cgtttggtat ggcttcattc agctccggtt cccaacgatc\r\n+ 6241 aaggcgagtt acatgatccc ccatgttgtg caaaaaagcg gttagctcct tcggtcctcc\r\n+ 6301 gatcgttgtc agaagtaagt tggccgcagt gttatcactc atggttatgg cagcactgca\r\n+ 6361 taattctctt actgtcatgc catccgtaag atgcttttct gtgactggtg agtactcaac\r\n+ 6421 caagtcattc tgagaatagt gtatgcggcg accgagttgc tcttgcccgg cgtcaacacg\r\n+ 6481 ggataatacc gcgccacata gcagaacttt aaaagtgctc atcattggaa aacgctcttc\r\n+ 6541 ggggcgaaaa ctctcaagga tcttaccgct gttgagatcc agttcgatgt aacccactcg\r\n+ 6601 tgcacccaac tgatcttcag catcttttac tttcaccagc gtttctgggt gagcaaaaac\r\n+ 6661 aggaaggcaa aatgccgcaa aaaagggaat aagggcgaca cggaaatgtt gaatactcat\r\n+ 6721 actcttcctt tttcaatatt attgaagcat ttatcagggt tattgtctca tgagcggata\r\n+ 6781 catatttgaa tgtatttaga aaaataaaca aataggggtt ccgcgcacat ttccccgaaa\r\n+ 6841 agtgccacct\r\n+//\r\n' |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/patterns.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/patterns.txt Mon May 20 18:10:52 2019 -0400 |
b |
@@ -0,0 +1,3 @@ +ACANNNNNNRTGG +ATCNNNNNCCT +BNNNNNNNGCGGTAVY |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/run_codon_switch.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/run_codon_switch.sh Mon May 20 18:10:52 2019 -0400 |
b |
@@ -0,0 +1,2 @@ +#python setup.py build_ext --inplace +python codon_switch.py -i pEPSA5_annotated.gb -l genbank -p patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -f 3 -m 27 -d demonstration -o ./output # -d demonstration |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/sre_yield.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/sre_yield.py Mon May 20 18:10:52 2019 -0400 |
[ |
b'@@ -0,0 +1,611 @@\n+#!/usr/bin/env python2\n+#\n+# Copyright 2011-2016 Google Inc.\n+#\n+# Licensed under the Apache License, Version 2.0 (the "License");\n+# you may not use this file except in compliance with the License.\n+# You may obtain a copy of the License at\n+#\n+# http://www.apache.org/licenses/LICENSE-2.0\n+#\n+# Unless required by applicable law or agreed to in writing, software\n+# distributed under the License is distributed on an "AS IS" BASIS,\n+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n+# See the License for the specific language governing permissions and\n+# limitations under the License.\n+#\n+# vim: sw=2 sts=2 et\n+\n+"""This module can generate all strings that match a regular expression.\n+\n+The regex is parsed using the SRE module that is standard in python,\n+then the data structure is executed to form a bunch of iterators.\n+"""\n+\n+__author__ = \'alexperry@google.com (Alex Perry)\'\n+__all__ = [\'Values\', \'AllStrings\', \'AllMatches\', \'ParseError\']\n+\n+\n+import bisect\n+import math\n+import re\n+import sre_constants\n+import sre_parse\n+import string\n+import sys\n+import types\n+\n+import cachingseq\n+import fastdivmod\n+\n+try:\n+ xrange = xrange\n+except NameError:\n+ xrange = range\n+\n+_RE_METACHARS = r\'$^{}*+\\\\\'\n+_ESCAPED_METACHAR = r\'\\\\[\' + _RE_METACHARS + r\']\'\n+ESCAPED_METACHAR_RE = re.compile(_ESCAPED_METACHAR)\n+# ASCII by default, see https://github.com/google/sre_yield/issues/3\n+CHARSET = [chr(c) for c in range(256)]\n+\n+WORD = string.ascii_letters + string.digits + \'_\'\n+\n+try:\n+ DEFAULT_RE_FLAGS = re.ASCII\n+except AttributeError:\n+ DEFAULT_RE_FLAGS = 0\n+\n+STATE_START, STATE_MIDDLE, STATE_END = list(range(3))\n+\n+def Not(chars):\n+ return \'\'.join(sorted(set(CHARSET) - set(chars)))\n+\n+\n+CATEGORIES = {\n+ sre_constants.CATEGORY_WORD: WORD,\n+ sre_constants.CATEGORY_NOT_WORD: Not(WORD),\n+ sre_constants.CATEGORY_DIGIT: string.digits,\n+ sre_constants.CATEGORY_NOT_DIGIT: Not(string.digits),\n+ sre_constants.CATEGORY_SPACE: string.whitespace,\n+ sre_constants.CATEGORY_NOT_SPACE: Not(string.whitespace),\n+}\n+\n+# This constant varies between builds of Python; this is the lower value.\n+MAX_REPEAT_COUNT = 65535\n+\n+\n+class ParseError(Exception):\n+ pass\n+\n+\n+def slice_indices(slice_obj, size):\n+ """slice_obj.indices() except this one supports longs."""\n+ # start stop step\n+ start = slice_obj.start\n+ stop = slice_obj.stop\n+ step = slice_obj.step\n+\n+ # We don\'t always update a value for negative indices (if we wrote it here\n+ # due to None).\n+ if step is None:\n+ step = 1\n+ if start is None:\n+ if step > 0:\n+ start = 0\n+ else:\n+ start = size - 1\n+ else:\n+ start = _adjust_index(start, size)\n+\n+ if stop is None:\n+ if step > 0:\n+ stop = size\n+ else:\n+ stop = -1\n+ else:\n+ stop = _adjust_index(stop, size)\n+\n+ return (start, stop, step)\n+\n+\n+def _adjust_index(n, size):\n+ if n < 0:\n+ n += size\n+\n+ if n < 0:\n+ raise IndexError("Out of range")\n+ if n > size:\n+ n = size\n+ return n\n+\n+\n+def _xrange(*args):\n+ """Because xrange doesn\'t support longs :("""\n+ # prefer real xrange if it works\n+ try:\n+ return xrange(*args)\n+ except OverflowError:\n+ return _bigrange(*args)\n+\n+\n+def _bigrange(*args):\n+ if len(args) == 1:\n+ start = 0; stop = args[0]; step = 1\n+ elif len(args) == 2:\n+ start, stop = args\n+ step = 1\n+ elif len(args) == 3:\n+ start, stop, step = args\n+ else:\n+ raise ValueError("Too many args for _bigrange")\n+\n+ i = start\n+ while True:\n+ yield i\n+ i += step\n+ if step < 0 and i <= stop:\n+ break\n+ if step > 0 and i >= stop:\n+ break\n+\n+\n+class WrappedSequence(object):\n+ """This wraps a sequence, purely as a base clase for the other uses."""\n+\n+ def __init__(self, raw):\n+ # Derived classes will li'..b'm/google/sre_yield/issues/3\n+ if flags & re.IGNORECASE:\n+ raise ParseError(\'Flag "i" not supported. https://github.com/google/sre_yield/issues/4\')\n+ elif flags & re.UNICODE:\n+ raise ParseError(\'Flag "u" not supported. https://github.com/google/sre_yield/issues/3\')\n+ elif flags & re.LOCALE:\n+ raise ParseError(\'Flag "l" not supported. https://github.com/google/sre_yield/issues/5\')\n+\n+ if max_count is None:\n+ self.max_count = MAX_REPEAT_COUNT\n+ else:\n+ self.max_count = max_count\n+\n+ self.has_groupref = False\n+\n+ # Configure the parser backends\n+ self.backends = {\n+ sre_constants.LITERAL: lambda y: [chr(y)],\n+ sre_constants.RANGE: lambda l, h: [chr(c) for c in range(l, h+1)],\n+ sre_constants.SUBPATTERN: self.maybe_save,\n+ sre_constants.BRANCH: self.branch_values,\n+ sre_constants.MIN_REPEAT: self.max_repeat_values,\n+ sre_constants.MAX_REPEAT: self.max_repeat_values,\n+ sre_constants.AT: self.nothing_added,\n+ sre_constants.ASSERT: self.empty_list,\n+ sre_constants.ASSERT_NOT: self.empty_list,\n+ sre_constants.ANY:\n+ lambda _: self.in_values(((sre_constants.NEGATE,),)),\n+ sre_constants.IN: self.in_values,\n+ sre_constants.NOT_LITERAL: self.not_literal,\n+ sre_constants.CATEGORY: self.category,\n+ sre_constants.GROUPREF: self.groupref,\n+ }\n+ self.state = STATE_START\n+ # Now build a generator that knows all possible patterns\n+ self.raw = self.sub_values(sre_parse.parse(pattern, flags))\n+ # Configure this class instance to know about that result\n+ self.length = self.raw.__len__()\n+\n+ def __contains__(self, item):\n+ # Since we have a regex, we can search the list really cheaply\n+ return self.matcher.match(item) is not None\n+\n+\n+class RegexMembershipSequenceMatches(RegexMembershipSequence):\n+ def __getitem__(self, i):\n+ if isinstance(i, slice):\n+ result = SlicedSequence(self, slicer=i)\n+ if result.__len__() < 16:\n+ # Short lists are unpacked\n+ result = [item for item in result]\n+ return result\n+\n+ d = {}\n+ s = super(RegexMembershipSequenceMatches, self).get_item(i, d)\n+ return Match(s, d, self.named_group_lookup)\n+\n+\n+def AllStrings(regex, flags=0, charset=CHARSET, max_count=None):\n+ """Constructs an object that will generate all matching strings."""\n+ return RegexMembershipSequence(regex, flags, charset, max_count=max_count)\n+\n+Values = AllStrings\n+\n+\n+class Match(object):\n+ def __init__(self, string, groups, named_groups):\n+ # TODO keep group(0) only, and spans for the rest.\n+ self._string = string\n+ self._groups = groups\n+ self._named_groups = named_groups\n+ self.lastindex = len(groups) + 1\n+\n+ def group(self, n=0):\n+ if n == 0:\n+ return self._string\n+ if not isinstance(n, int):\n+ n = self._named_groups[n]\n+ return self._groups[n]\n+\n+ def groups(self):\n+ return tuple(self._groups[i] for i in range(1, self.lastindex))\n+\n+ def groupdict(self):\n+ d = {}\n+ for k, v in self._named_groups.items():\n+ d[k] = self._groups[v]\n+ return d\n+\n+ def span(self, n=0):\n+ raise NotImplementedError()\n+\n+\n+def AllMatches(regex, flags=0, charset=CHARSET, max_count=None):\n+ """Constructs an object that will generate all matching strings."""\n+ return RegexMembershipSequenceMatches(regex, flags, charset, max_count=max_count)\n+\n+\n+def main(argv=None):\n+ """This module can be executed on the command line for testing."""\n+ if argv is None:\n+ argv = sys.argv\n+ for arg in argv[1:]:\n+ for i in AllStrings(arg):\n+ print(i)\n+\n+\n+if __name__ == \'__main__\':\n+ main()\n' |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/sre_yield.pyc |
b |
Binary file CodonSwitchTool/sre_yield.pyc has changed |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/syngenic.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/syngenic.py Mon May 20 18:10:52 2019 -0400 |
[ |
b'@@ -0,0 +1,362 @@\n+#!/usr/bin/env python\r\n+\r\n+__author__= "Gianmarco Piccinno"\r\n+__version__ = "1.0.0"\r\n+\r\n+import Bio\r\n+from Bio import SeqIO\r\n+from Bio.Seq import Seq\r\n+from Bio.Alphabet import IUPAC\r\n+from Bio.Data import IUPACData\r\n+from Bio.Data import CodonTable\r\n+import re\r\n+import sre_yield\r\n+\r\n+import re\r\n+import itertools\r\n+from functools import reduce\r\n+\r\n+import Bio\r\n+from Bio import Data\r\n+from Bio.Data import IUPACData\r\n+from Bio.Data import CodonTable\r\n+\r\n+from pprint import pprint\r\n+\r\n+import pandas as pd\r\n+\r\n+def _check_bases(seq_string):\r\n+ """\r\n+ Check characters in a string (PRIVATE).\r\n+ Remove digits and white space present in string. Allows any valid ambiguous\r\n+ IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, upper case are converted).\r\n+\r\n+ Other characters (e.g. symbols) trigger a TypeError.\r\n+\r\n+ Returns the string WITH A LEADING SPACE (!). This is for backwards\r\n+ compatibility, and may in part be explained by the fact that\r\n+ Bio.Restriction doesn\'t use zero based counting.\r\n+ """\r\n+ # Remove white space and make upper case:\r\n+ seq_string = "".join(seq_string.split()).upper()\r\n+ # Remove digits\r\n+ for c in "0123456789":\r\n+ seq_string = seq_string.replace(c, "")\r\n+ # Check only allowed IUPAC letters\r\n+ if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):\r\n+ raise TypeError("Invalid character found in %s" % repr(seq_string))\r\n+ return " " + seq_string\r\n+\r\n+ matching = {\'A\': \'ARWMHVDN\', \'C\': \'CYSMHBVN\', \'G\': \'GRSKBVDN\',\r\n+ \'T\': \'TYWKHBDN\', \'R\': \'ABDGHKMNSRWV\', \'Y\': \'CBDHKMNSTWVY\',\r\n+ \'W\': \'ABDHKMNRTWVY\', \'S\': \'CBDGHKMNSRVY\', \'M\': \'ACBDHMNSRWVY\',\r\n+ \'K\': \'BDGHKNSRTWVY\', \'H\': \'ACBDHKMNSRTWVY\',\r\n+ \'B\': \'CBDGHKMNSRTWVY\', \'V\': \'ACBDGHKMNSRWVY\',\r\n+ \'D\': \'ABDGHKMNSRTWVY\', \'N\': \'ACBDGHKMNSRTWVY\'}\r\n+\r\n+class pattern(object):\r\n+\r\n+\r\n+ def __init__(self, pattern_input):\r\n+ s = str(pattern_input)\r\n+ self.upper = s.isupper()\r\n+ self.data = _check_bases(s)\r\n+ self.pattern = s\r\n+\r\n+ def plan_ambiguity(self):\r\n+ val = Bio.Data.IUPACData.ambiguous_dna_values\r\n+ re_pattern = ""\r\n+ for el in self.pattern:\r\n+ re_pattern = re_pattern + "[" + val[el] + "]"\r\n+ return re_pattern\r\n+\r\n+class annotated_genome(object):\r\n+\r\n+ def __init__(self, seq):\r\n+ s = str(seq)\r\n+ self.upper = s.isupper()\r\n+ self.data = _check_bases(s)\r\n+ self.seq = s\r\n+\r\n+ def codon_usage(self, codonTable):\r\n+\r\n+ codon_usage = {}\r\n+ tmp = [x for x in re.split(r\'(\\w{3})\', self.seq) if x != ""]\r\n+\r\n+ b_cod_table = CodonTable.unambiguous_dna_by_name["Bacterial"].forward_table\r\n+ aas = set(b_cod_table.values())\r\n+\r\n+ for aa in aas:\r\n+ codon_usage[aa] = {}\r\n+ for codon in b_cod_table.keys():\r\n+ if b_cod_table[codon] == aa:\r\n+ codon_usage[aa][codon] = tmp.count(codon)\r\n+\r\n+ tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}\r\n+\r\n+ codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])\r\n+ codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])\r\n+ codon_usage_[\'Proportion\'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))\r\n+\r\n+ codon_usage_.reset_index(inplace=True)\r\n+ codon_usage_.index = codon_usage_["Codon"]\r\n+\r\n+ return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}\r\n+\r\n+class plasmid(object):\r\n+ """\r\n+ This class represents a circular plasmid\r\n+ """\r\n+\r\n+ def __init__(self, seq = "", circular=True, features = None):\r\n+\r\n+ if type(seq) in [Bio.SeqRecord.SeqRecord, plasmid, Seq]:\r\n+ s = str(seq.seq)\r\n+ self.features = seq.features\r\n+ else:\r\n+ s = str(seq)\r\n+ i'..b'codons:\r\n+ b_cod_table[cod] = "_Stop"\r\n+\r\n+ for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:\r\n+ #print(cod)\r\n+ b_cod_table[cod] = b_cod_table[cod]\r\n+\r\n+ aas = set(b_cod_table.values())\r\n+\r\n+ for aa in aas:\r\n+ #print(aa)\r\n+ #codon_usage[aa] = {}\r\n+ for codon in b_cod_table.keys():\r\n+ if b_cod_table[codon] == aa:\r\n+ codon_usage[codon] = tmp.count(codon.split(" ")[0])\r\n+\r\n+ return codon_usage\r\n+\r\n+\r\n+def read_annotated_genome(data="example.fna", type_="fasta"):\r\n+ """\r\n+ Accepted formats:\r\n+ - fasta (multifasta)\r\n+ - gbk\r\n+\r\n+ """\r\n+\r\n+ seqs = ""\r\n+\r\n+ if type_ == "fasta":\r\n+ with open(data, "rU") as handle:\r\n+ for record in SeqIO.parse(handle, type_):\r\n+ seqs = seqs + str(record.seq)\r\n+\r\n+ elif type_ == "genbank":\r\n+ with open(data, "rU") as input_handle:\r\n+ types = []\r\n+ for record in SeqIO.parse(input_handle, "genbank"):\r\n+ for feature in record.features:\r\n+ types.append(feature.type)\r\n+ if feature.type == "CDS":\r\n+ if feature.location.strand == +1:\r\n+ seq = record.seq[feature.location.start:feature.location.end]\r\n+ seqs = seqs + str(seq)\r\n+ elif feature.location.strand == -1:\r\n+ seq = record.seq[feature.location.start:\r\n+ feature.location.end].reverse_complement()\r\n+ seqs = seqs + str(seq)\r\n+ return seqs\r\n+\r\n+\r\n+def synonims_(table_name="Bacterial"):\r\n+\r\n+ b_cod_table = CodonTable.unambiguous_dna_by_name[table_name].forward_table\r\n+\r\n+ print(b_cod_table)\r\n+\r\n+ for cod in CodonTable.unambiguous_dna_by_name[table_name].stop_codons:\r\n+ b_cod_table[cod] = "_Stop"\r\n+\r\n+ for cod in CodonTable.unambiguous_dna_by_name[table_name].start_codons:\r\n+ b_cod_table[cod] = "_Start"\r\n+\r\n+ #pprint(b_cod_table)\r\n+ codons = {}\r\n+\r\n+ aas = set(b_cod_table.values())\r\n+\r\n+ for aa in aas:\r\n+ codons[aa] = []\r\n+ for codon in b_cod_table.keys():\r\n+ if b_cod_table[codon] == aa:\r\n+ codons[aa].append(codon)\r\n+\r\n+ #break\r\n+\r\n+ synonims = {}\r\n+\r\n+ for el1 in codons.keys():\r\n+ print(el1)\r\n+ for el2 in codons[el1]:\r\n+ print(el2)\r\n+ synonims[el2] = codons[el1]\r\n+ #synonims[el2] = []\r\n+ #for el3 in codons[el1]#set.difference(set(codons[el1]), {el2}):\r\n+ # print(el3)\r\n+ # synonims[el2].append(el3)\r\n+ #break\r\n+ #break\r\n+ #break\r\n+\r\n+\r\n+ anti_codons = {}\r\n+\r\n+ for codon in synonims.keys():\r\n+ tmp_codon = Bio.Seq.Seq(codon, IUPAC.unambiguous_dna)\r\n+ tmp_anticodon = str(tmp_codon.reverse_complement())\r\n+\r\n+ anti_codons[tmp_anticodon] = []\r\n+\r\n+ for synonim in synonims[codon]:\r\n+ tmp_synonim = Bio.Seq.Seq(synonim, IUPAC.unambiguous_dna)\r\n+ tmp_antisynonim = str(tmp_synonim.reverse_complement())\r\n+ anti_codons[tmp_anticodon].append(tmp_antisynonim)\r\n+\r\n+ check = Bio.Seq.Seq("CTT")\r\n+ anti_check = check.reverse_complement()\r\n+ print("\\nCheck:\\n" + str(check))\r\n+ print("\\nCodons:\\n")\r\n+\r\n+ for key in codons.keys():\r\n+ if str(check) in codons[key]:\r\n+ print(codons[key])\r\n+\r\n+ #pprint(codons)\r\n+ print("\\nSynonims:\\n")\r\n+ pprint(synonims[str(check)])\r\n+ print("\\nAnti_Codons:\\n")\r\n+ pprint(anti_codons[str(anti_check)])\r\n+\r\n+ #i = synonims.keys()\r\n+ #right = True\r\n+ #while len(i) > 0:\r\n+ # tmp = i.pop()\r\n+ # check = Bio.Seq.Seq(tmp)\r\n+ # anti_check = check.reverse_complement()\r\n+\r\n+\r\n+ return {"synonims":synonims, "anti_synonims":anti_codons}\r\n' |
b |
diff -r 611cac5e3066 -r 640db7b6847b CodonSwitchTool/syngenic.pyc |
b |
Binary file CodonSwitchTool/syngenic.pyc has changed |
b |
diff -r 611cac5e3066 -r 640db7b6847b project_rm/codon_usage.py --- a/project_rm/codon_usage.py Mon May 20 18:05:38 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,148 +0,0 @@ -#!/usr/bin/env python - -import Bio as Bio -from Bio import SeqIO -from Bio.Data import CodonTable -import re -from pprint import pprint -import argparse as ap -import sys -import os -import pandas as pd - - -def read_input(data = "example.fna", type_ = "fasta"): - - """ - Accepted formats: - - fasta (multifasta) - - gff - - gbk - - """ - - seqs = "" - - if type_ == "fasta": - with open(data, "rU") as handle: - for record in SeqIO.parse(handle, type_): - seqs = seqs + str(record.seq) - - elif type_ == "gbk": - with open(data, "rU") as input_handle: - types = [] - for record in SeqIO.parse(input_handle, "genbank"): - for feature in record.features: - types.append(feature.type) - if feature.type == "CDS": - if feature.location.strand == +1: - seq = record.seq[feature.location.start:feature.location.end] - seqs = seqs + str(seq) - elif feature.location.strand == -1: - seq = record.seq[feature.location.start:feature.location.end].reverse_complement - seqs = seqs + str(seq) - return seqs - -def codon_usage(seqs, codonTable): - - codon_usage = {} - tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""] - - b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table - - - for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons: - b_cod_table[cod] = "_Stop" - - for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons: - b_cod_table[cod + " Start"] = b_cod_table[cod] - b_cod_table.pop(cod) - - aas = set(b_cod_table.values()) - - - for aa in aas: - codon_usage[aa] = {} - for codon in b_cod_table.keys(): - if b_cod_table[codon] == aa: - codon_usage[aa][codon] = tmp.count(codon.split(" ")[0]) - - - tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()} - - codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"]) - codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"]) - codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2)) - - return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_} - -if __name__ == '__main__': - - parser = ap.ArgumentParser(description= - 'This script takes as input gff, gbk and single or multifasta files and \n' - 'compute the codon usage for a specified codon table.\n' - 'Usage:\n' - 'python codon_usage.py -i example.gbk -t genebank -o gbk_example -c Bacterial\n' - 'python codon_usage.py -i example.ffn -t fasta -o fasta_example -c Bacterial\n' - 'python codon_usage.py -i example.gff -t gff -o gff_example -c Bacterial\n', - formatter_class=ap.RawTextHelpFormatter) - - parser.add_argument('-i','--input', help='The path to the input file',required=True) - parser.add_argument('-t','--type', help= - 'The format of the file [genebank, fasta, gff ...]', required=True) - parser.add_argument('-c','--codonTable', help= - 'The codon table to be used [Standard, Bacterial, Archaeal ...]\n' - 'Alternative Flatworm Mitochondrial,\\n' - 'Alternative Yeast Nuclear,\n' - 'Archaeal,\n' - 'Ascidian Mitochondrial,\n' - 'Bacterial,\n' - 'Blastocrithidia Nuclear,\n' - 'Blepharisma Macronuclear,\n' - 'Candidate Division SR1,\n' - 'Chlorophycean Mitochondrial,\n' - 'Ciliate Nuclear,\n' - 'Coelenterate Mitochondrial,\n' - 'Condylostoma Nuclear,\n' - 'Dasycladacean Nuclear,\n' - 'Echinoderm Mitochondrial,\n' - 'Euplotid Nuclear,\n' - 'Flatworm Mitochondrial,\n' - 'Gracilibacteria,\n' - 'Hexamita Nuclear,\n' - 'Invertebrate Mitochondrial,\n' - 'Karyorelict Nuclear,\n' - 'Mesodinium Nuclear,\n' - 'Mold Mitochondrial,\n' - 'Mycoplasma,\n' - 'Pachysolen tannophilus Nuclear,\n' - 'Peritrich Nuclear,\n' - 'Plant Plastid,\n' - 'Protozoan Mitochondrial,\n' - 'Pterobranchia Mitochondrial,\n' - 'SGC0,\n' - 'SGC1,\n' - 'SGC2,\n' - 'SGC3,\n' - 'SGC4,\n' - 'SGC5,\n' - 'SGC8,\n' - 'SGC9,\n' - 'Scenedesmus obliquus Mitochondrial,\n' - 'Spiroplasma,\n' - 'Standard,\n' - 'Thraustochytrium Mitochondrial,\n' - 'Trematode Mitochondrial,\n' - 'Vertebrate Mitochondrial,\n' - 'Yeast Mitochondrial\n', required=True) - - parser.add_argument('-o','--output', help='Description for bar argument', required=True) - args = vars(parser.parse_args()) - - seqs = read_input(data=args['input'], type_=args['type']) - out = codon_usage(seqs, args['codonTable']) - - with open(args['output'], "w") as outf: - out["Table"].to_csv(outf, sep="\t", index_label=["AA", "Codon"]) - - |
b |
diff -r 611cac5e3066 -r 640db7b6847b project_rm/codon_usage.xml --- a/project_rm/codon_usage.xml Mon May 20 18:05:38 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,80 +0,0 @@ -<?xml version="1.0"?> -<tool name="Codon Usage" id="codon_usage" version="0.2.6"> - <description>for each sequence in a file</description> - <requirements> - <requirement type="package" version="2.7.15">python</requirement> - <requirement type="package" version="1.72">biopython</requirement> - <requirement type="package" version="0.23.4">pandas</requirement> - <requirement type="package" version="1.15.3">numpy</requirement> - </requirements> - - <command> -<![CDATA[ - python '$__tool_directory__/codon_usage.py' -i $input -t $input_type -o $output -c $codon_table -]]> - </command> - <inputs> - <param name="input" format="fasta" type="data" label="Source file"/> - - <param name="input_type" type="select" format="text"> - <label>Indicate the input file format</label> - <option value="fasta">Fasta</option> - <option value="gbk">gbk</option> - </param> - - <param name="codon_table" type="select" format="text"> - <label>Choose the proper codon table for your organism)</label> - <option value="Alternative Flatworm Mitochondrial">"Alternative Flatworm Mitochondrial"</option> - <option value="Alternative Yeast Nuclear">"Alternative Yeast Nuclear"</option> - <option value="Archaeal">"Archaeal"</option> - <option value="Ascidian Mitochondrial">"Ascidian Mitochondrial"</option> - <option value="Bacterial">"Bacterial"</option> - <option value="Blastocrithidia Nuclear">"Blastocrithidia Nuclear"</option> - <option value="Blepharisma Macronuclear">"Blepharisma Macronuclear"</option> - <option value="Candidate Division SR1">"Candidate Division SR1"</option> - <option value="Chlorophycean Mitochondrial">"Chlorophycean Mitochondrial"</option> - <option value="Ciliate Nuclear">"Ciliate Nuclear"</option> - <option value="Coelenterate Mitochondrial">"Coelenterate Mitochondrial"</option> - <option value="Condylostoma Nuclear">"Condylostoma Nuclear"</option> - <option value="Dasycladacean Nuclear">"Dasycladacean Nuclear"</option> - <option value="Echinoderm Mitochondrial">"Echinoderm Mitochondrial"</option> - <option value="Euplotid Nuclear">"Euplotid Nuclear"</option> - <option value="Flatworm Mitochondrial">"Flatworm Mitochondrial"</option> - <option value="Gracilibacteria">"Gracilibacteria"</option> - <option value="Hexamita Nuclear">"Hexamita Nuclear"</option> - <option value="Invertebrate Mitochondrial">"Invertebrate Mitochondrial"</option> - <option value="Karyorelict Nuclear">"Karyorelict Nuclear"</option> - <option value="Mesodinium Nuclear">"Mesodinium Nuclear"</option> - <option value="Mold Mitochondrial">"Mold Mitochondrial"</option> - <option value="Mycoplasma">"Mycoplasma"</option> - <option value="Pachysolen tannophilus Nuclear">"Pachysolen tannophilus Nuclear"</option> - <option value="Peritrich Nuclear">"Peritrich Nuclear"</option> - <option value="Plant Plastid">"Plant Plastid"</option> - <option value="Protozoan Mitochondrial">"Protozoan Mitochondrial"</option> - <option value="Pterobranchia Mitochondrial">"Pterobranchia Mitochondrial"</option> - <option value="SGC0">"SGC0"</option> - <option value="SGC1">"SGC1"</option> - <option value="SGC2">"SGC2"</option> - <option value="SGC3">"SGC3"</option> - <option value="SGC4">"SGC4"</option> - <option value="SGC5">"SGC5"</option> - <option value="SGC8">"SGC8"</option> - <option value="SGC9">"SGC9"</option> - <option value="Scenedesmus obliquus Mitochondrial">"Scenedesmus obliquus Mitochondrial"</option> - <option value="Spiroplasma">"Spiroplasma"</option> - <option value="Standard">"Standard"</option> - <option value="Thraustochytrium Mitochondrial">"Thraustochytrium Mitochondrial"</option> - <option value="Trematode Mitochondrial">"Trematode Mitochondrial"</option> - <option value="Vertebrate Mitochondrial">"Vertebrate Mitochondrial"</option> - <option value="Yeast Mitochondrial">"Yeast Mitochondrial"</option> - </param> - </inputs> - - <outputs> - <data format="tabular" name="output" /> - </outputs> - - <help> -This tool computes codon usage of an annotated genome [preferably Prokaryotes]. - </help> -</tool> |