Repository 'cs_tool_project_rm'
hg clone https://toolshed.g2.bx.psu.edu/repos/gianmarco_piccinno/cs_tool_project_rm

Changeset 1:1c31d6d25429 (2019-05-21)
Previous changeset 0:5397da1ef896 (2019-05-21) Next changeset 2:aad5e435e4dc (2019-05-21)
Commit message:
Deleted selected files
removed:
S_aureus_JE2.gbf
cachingseq.py
cachingseq.pyc
codon_switch.py
codon_switch.xml
comparison_syngenic_plasmids.pdf
fastdivmod.py
fastdivmod.pyc
functions.py
functions.pyc
further_information.pdf
pEPSA5_annotated.gb
patterns.txt
run_codon_switch.sh
sre_yield.py
sre_yield.pyc
syngenic.py
syngenic.pyc
b
diff -r 5397da1ef896 -r 1c31d6d25429 S_aureus_JE2.gbf
--- a/S_aureus_JE2.gbf Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,92850 +0,0 @@\n-LOCUS       S.                   2903350 bp    DNA     linear       13-NOV-2018\n-DEFINITION  Streptococcus aureus strain JE2.\n-ACCESSION   \n-VERSION\n-KEYWORDS    .\n-SOURCE      Streptococcus aureus\n-  ORGANISM  Streptococcus aureus\n-            Unclassified.\n-COMMENT     Annotated using prokka 1.12 from\n-            https://github.com/tseemann/prokka.\n-FEATURES             Location/Qualifiers\n-     source          1..2903350\n-                     /organism="Streptococcus aureus"\n-                     /mol_type="genomic DNA"\n-                     /strain="JE2"\n-     gene            complement(40..321)\n-                     /locus_tag="SaJE2__00001"\n-     CDS             complement(40..321)\n-                     /locus_tag="SaJE2__00001"\n-                     /inference="ab initio prediction:Prodigal:2.6"\n-                     /codon_start=1\n-                     /transl_table=11\n-                     /product="hypothetical protein"\n-                     /translation="MAERISSKIRRLEKSEEQIKLESLNEVTEAIAANKDSILKAIKL\n-                     IKTLDDAKLLDALNGAIRGRQVIINNLQLNLIKIFIQGYYLIWLQWYFY"\n-     gene            complement(798..1187)\n-                     /locus_tag="SaJE2__00002"\n-     CDS             complement(798..1187)\n-                     /locus_tag="SaJE2__00002"\n-                     /EC_number="1.2.1.2"\n-                     /inference="ab initio prediction:Prodigal:2.6"\n-                     /inference="similar to AA sequence:UniProtKB:Q99RW4"\n-                     /codon_start=1\n-                     /transl_table=11\n-                     /product="Putative formate dehydrogenase"\n-                     /translation="MPASPSLERRYFYKYRRRIQRLYQALEPLGDSKPDWKIFQAIAN\n-                     RLGFDWNYKHPSEIMDEVARLTPLYAGVSYDRLEGFNSLQWPVQPDGTDEPILYLEGF\n-                     NFDNGKANYSHYHLIITLSKMKFMIFM"\n-     gene            complement(1165..2010)\n-                     /locus_tag="SaJE2__00003"\n-     CDS             complement(1165..2010)\n-                     /locus_tag="SaJE2__00003"\n-                     /EC_number="1.2.1.2"\n-                     /inference="ab initio prediction:Prodigal:2.6"\n-                     /inference="similar to AA sequence:UniProtKB:Q99RW4"\n-                     /codon_start=1\n-                     /transl_table=11\n-                     /product="Putative formate dehydrogenase"\n-                     /translation="MSVLIGTNTAEAHPVIASRMKRAQKLFGQKIHVFDIRKHEMAER\n-                     ADRFYQPKPGTDLAWLSAVTKYIIDHDLHDKAFIDEWVDDFDEYYKSLETFTMAFAEE\n-                     ATGIPESELIKFAEECAKAESVVICWAMGITQQDIGSDSSTAISNLLLVTGNYRRPGT\n-                     GAYPLRGHNNVQGCSDMGSMPDKITGYQSIEADDIRAKFEKEYGVKLNPKAGKDNHEM\n-                     VEGIHDGEVHSLYLYGEDTGIVDSNINFVQAAFEKLDFMVVQDEFFNIHSNIRRCCIA\n-                     SKSFT"\n-     gene            complement(1988..2218)\n-                     /locus_tag="SaJE2__00004"\n-     CDS             complement(1988..2218)\n-                     /locus_tag="SaJE2__00004"\n-                     /EC_number="1.2.1.2"\n-                     /inference="ab initio prediction:Prodigal:2.6"\n-                     /inference="similar to AA sequence:UniProtKB:Q99RW4"\n-                     /codon_start=1\n-                     /transl_table=11\n-                     /product="Putative formate dehydrogenase"\n-                     /translation="MAQMHYHSFSSKATNEESYLMQKLARQVIGTNNVDNCSRYCQAP\n-                     ATKGLFRTVGHGGDSGSIEDLEKSGNVCIDRY"\n-     gene            complement(2184..2399)\n-                     /locus_tag="SaJE2__00005"\n-     CDS             complement(2184..2399)\n-                     /locus_tag="SaJE2__00005"\n-                     /EC_number="1.2.1.2"\n-                     /inference="ab initio prediction:Prodigal:2.6"\n-                     /inference="similar to AA sequence:UniProtKB:Q99RW4"\n-                     /codon_start=1\n-                     /transl_table=11\n-                     /product="Putative formate dehydrogenase"\n-                     /translation="MILQQIKIA'..b'c cttctccacg ttctttcgcc tcttctgcta attttaatgc\n-  2900281 ttcatctaaa tcagctgttt taacatcaca gtatttcgta tcaattcgct tatcaacacg\n-  2900341 tgtttcatca acatccacgc aaattgctac cccatgattc atagtaattg ctaacggttg\n-  2900401 cgcaccaccc ataccaccta aacctgctgt cagtgtaaca gtgcctgcta aatctccatt\n-  2900461 aaagtgttga ttacctagct cggcaaatgt ctcataagta ccttgcacaa taccttgaga\n-  2900521 accaatatat atccaactac cggctgtcat ctgtccatac atgattaaac cttttttatc\n-  2900581 taattcatta aaatgatccc agtttgccca ttcaggcact aatactgaat ttgaaattaa\n-  2900641 tacacgtggc gcttcttcat gtgttttaaa tacagcaact ggctttcctg attgtactaa\n-  2900701 cattgtctca tctgattcta attctcgtaa cgttttctct attgcttcaa aagcttccca\n-  2900761 attacgtgct gcttttccaa taccaccata aacaactaaa tcttctggtc tttcagcaac\n-  2900821 ttctgggtct aaattgttgt ataacattct aagtactgct tcttgttccc aacctttaca\n-  2900881 ctcaatactc aaaccttttt ttgcttgaat ttttctcata aaattcgctc ctgttctttt\n-  2900941 aagaagttaa ttccactaaa tttaaaacgc ttacattatt atcttcaata ttcattatag\n-  2901001 tatgttaaaa tatagccaac aaatataaat aaactaatta tccatagctt gaatctataa\n-  2901061 ataaaaggag caaaacacat gaaaattatt cagttagaat acttcttggc tatcgtgaaa\n-  2901121 tataatagtt ttactaaagc tgcacaattt ttacatatta gccagccatc tttaactgct\n-  2901181 acgattaaaa aaaatggaag cagatttagg ttatgactta tttacacgtt caacaaaaga\n-  2901241 catcaagatt accgaaaaag gaatacagtt ttatcgttat gcgagcgaat tagttcaaca\n-  2901301 atatcgatcc acgatggaaa aaatgtatga tttaagcgtt acatcagaac caaggataaa\n-  2901361 aattgggact cttgaatcta cgaatcaatg gattgcgaat ttaattcgaa agcaccattc\n-  2901421 cgactaccct gaacagcaat atcgtttata tgaaatacat gataaacatc aatctataga\n-  2901481 gcaattactg aattttaata ttcatttagc tataacaaat gaaaaataac ccacgaagat\n-  2901541 ataagatcca ttcctttata tgaggaatct tacattttat tagcacccaa ggaaacattt\n-  2901601 aaaaatcaaa attgggtaga tgttgaaaat ttgccactca tattaccaaa caaaaattct\n-  2901661 caagtgcgca aacacttaga tgactatttt aatagaagaa atattcgtcc aaatgtcgtt\n-  2901721 gtagaaacag atcgattcga atcagcagtt ggatttgttc atctcggctt aggttacgct\n-  2901781 atcattccga gattttatta ccaatcattt cacacgtcta atttagaata taaaaaaaat\n-  2901841 tcgtccaaac ttaggccgaa aaatttatat caattaccat aaaaaacgca aacactccga\n-  2901901 acagtacata cattcgtaca acaatgccaa gattatttat atggactttt agaggctctt\n-  2901961 taacttaagt tattagagcc tcttatgcag ttgcgcagat catcgtataa aaattaatga\n-  2902021 cgtcatttca aaaatcgata caaaaataat ttattataaa aattctaaga aagtgaagca\n-  2902081 gatgttaaaa tctattaatc atatatgctt ttcagtcaga aatttaaacg attcaataca\n-  2902141 tttttataga gatattttac ttgggaaatt gctattgact ggtaaaaaaa ctgcttattt\n-  2902201 tgagcttgca ggcctatgga ttgctttaaa tgaagaaaaa gatataccac gtaatgaaat\n-  2902261 tcacttttca tatacacata tagctttcac tatagatgac agcgaattta aatattggca\n-  2902321 tcagaggtta aaagataata acgtgaatat tttagaagga agagttagag atattagaga\n-  2902381 tagacaatca atttacttta ccgaccctga tggtcataag ctagaattac atactggcac\n-  2902441 acttgagaac agattaaatt attataaaga ggctaaacca catatgacat tttacaaata\n-  2902501 aggtgtcatt ataaaaaggc ctcttgaact ccgttaaaat tttaattaat tattatataa\n-  2902561 taagagaact tttcaaacaa tacagttgtt atttttgcta tttcaacaaa cataaataag\n-  2902621 cagtaagatg actacaactt aagagtcttc ttactgcaat tatttttcaa atatatcaac\n-  2902681 gttaatataa cttctattaa gaaatactca cattctgccc tgcaatgcaa atctcgtcac\n-  2902741 atataaatat ttttaattat tttaaaaaat gatgcactaa attagcaacg agcttagcag\n-  2902801 ttctattgtc agcgtcatat gttggattca tctcagcaat actaactgaa gacaccttat\n-  2902861 cacttggaat aatacgtttt gctaattcaa gaacagtatg tggatacaaa cctaacactg\n-  2902921 ccggcgcact taccccaggc gcaaacgcac tatcaatgac atccatacaa atcgtaaaca\n-  2902981 taatgacatc atgttcatgt acaaaacgtt caatcatatc tttaattgtt ggtgatacgt\n-  2903041 gactcaataa ttcatctgca aagacataat caatcttttt ctctttagca taatcaaata\n-  2903101 aaactttgcg tattaccacc ttgagcaata ccaagcacta aataatctgt gttttcatct\n-  2903161 tcttctaaaa tttgtctaaa gctcgttcca gatgtagatt gttgttcagc acgtgtatca\n-  2903221 aaatgcgcat caatatttat cacaccaata gattgtgttg gatagacttt acgtgttgct\n-  2903281 aaatattgag catacgcaat atcatgtcca ccacctaata aaaatgtttg tctatgatta\n-  2903341 gcaattgact\n-//\n'
b
diff -r 5397da1ef896 -r 1c31d6d25429 cachingseq.py
--- a/cachingseq.py Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,55 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright 2011-2016 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# vim: sw=2 sts=2 et
-
-class CachingFuncSequence(object):
-    def __init__(self, func, length, inc_func=None):
-        """
-        length: Length of this sequence.
-        func: function(index)
-        inc_func: function(index, value_of_previous)
-        """
-
-        self.func = func
-        self.inc_func = inc_func
-        self.length = length
-        self._cache = {}
-
-    def __getitem__(self, i):
-        if i < 0:
-            i += self.length
-        if i < 0 or i >= self.length:
-            raise IndexError()
-
-        v = self._cache.get(i)
-        if v is not None:
-            return v
-
-        if self.inc_func and i-1 in self._cache:
-            v = self.inc_func(i, self._cache[i-1])
-        else:
-            v = self.func(i)
-
-        self._cache[i] = v
-        return v
-
-    def __len__(self):
-        return self.length
-
-    def __iter__(self):
-        for i in range(self.length):
-            yield self[i]
b
diff -r 5397da1ef896 -r 1c31d6d25429 cachingseq.pyc
b
Binary file cachingseq.pyc has changed
b
diff -r 5397da1ef896 -r 1c31d6d25429 codon_switch.py
--- a/codon_switch.py Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,172 +0,0 @@
-#!/usr/bin/env python
-
-__author__= "Gianmarco Piccinno"
-__version__ = "1.0.0"
-
-from syngenic import *
-from functions import *
-from Bio import *
-import argparse as ap
-
-if __name__ == '__main__':
-
-    parser = ap.ArgumentParser(description="", formatter_class=ap.RawTextHelpFormatter)
-
-    parser.add_argument(
-        '-i', '--input_plasmid', help='Input plasmid', required=True)
-    parser.add_argument(
-        '-l', '--plasmid_format', help='Format of the plasmid: {fasta, genbank}', required=True)
-    parser.add_argument(
-        '-p', '--input_patterns', help='Input patterns separated by new_line', required=True)
-    parser.add_argument(
-        '-g', '--input_genome', help='Input annotated genome', required=True)
-    parser.add_argument(
-        '-q', '--genome_format', help='Format of the annotated genome: {fasta, gbk}', required=True)
-    parser.add_argument(
-        '-c', '--codon_table', help='Codon table to be used {Bacterial}', required=True)
-    parser.add_argument(
-        '-m', '--max_row', help='Max row length when print', required=False)
-    parser.add_argument(
-        '-d', '--demonstration', help='Use demonstration simplication', required=False)
-    parser.add_argument(
-        '-f', '--n_plasmids', help='Use demonstration simplication', required=False)
-    parser.add_argument(
-        '-o', '--output_folder', help='Folder for writing the output file', required=True)
-    args = vars(parser.parse_args())
-
-    """
-
-    python codon_switch_v2.py
-        -i ./pEPSA5_annotated.gb
-        -l genbank
-        -p ./patterns.txt
-        -g S_aureus_JE2.gbf
-        -q gbk -c Bacterial
-        -o ./output
-
-        python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -o ./output
-
-    """
-
-
-    pl = SeqIO.read(
-        open(args['input_plasmid'], "r"), args['plasmid_format'])
-
-    if args['demonstration'] == "demonstration":
-        pl = pl[0:3000]
-    pats = read_patterns(args['input_patterns'])
-
-
-    #############################################################
-    #
-    #############################################################
-
-    #pl = fake_from_real(path = "./data/pEPSA5_annotated.gb", id_ = "Trial", name = "Fake_plasmid")
-    print(type(pl))
-    print(pl); print(pl.seq); print(pl.features)
-
-    #for feat in pl.features:
-    #    print(str(feat.extract(pl)))
-    #    print(str(pl[feat.location.start:feat.location.end]))
-    #    print("\n")
-
-
-    n_pl = plasmid(pl)
-    print(n_pl); print(len(n_pl))
-    print(n_pl.features)
-
-
-    patts, n_patts = all_patterns(input_ = pats)
-
-
-    f_patts = n_pl.findpatterns(n_patts, patts)
-    print(f_patts)
-    print(pl.seq)
-    print(len(pl.seq))
-
-
-    n_poss = punctuate_targets(f_patts, n_pl)
-    print(n_poss)
-
-    print_seq(n_pl.seq)
-
-    synonims_tables = synonims_(table_name=args['codon_table'])
-
-    synonims_tables
-
-    plasmids = generalization(n_poss, n_pl, synonims_tables)
-
-    print(len(plasmids))
-
-    #plasmids
-
-    #if len(plasmids) > 5000000:
-        #redo generalization without considering internal bases
-        #in target sites that are not in CDS
-        #this means considering only the outer bases of the target
-    #    plasmids = generalization(n_poss, n_pl, synonims_tables,
-    #                              reduced = True)
-
-    #########################################################
-    # Read plasmid and compute codon usage
-    #########################################################
-
-    genome = annotated_genome(read_annotated_genome(
-            data=args['input_genome'], type_=args['genome_format']))
-
-    out_genome = genome.codon_usage(args['codon_table'])
-    print(out_genome.keys())
-    print(out_genome["Table"])
-
-    print(out_genome["Table"].loc["GCA"]["Proportion"])
-    print(type(out_genome["Table"].loc["GCA"]["Proportion"]))
-
-
-    #########################################################
-    # Evaluate the plasmid
-    #########################################################
-
-    useful_plasmids = evaluate_plasmids(plasmids = plasmids,
-                                        original_plasmid = n_pl,
-                                        codon_usage_table = out_genome["Table"],
-                                        n_patts = n_patts,
-                                        f_patts = patts)
-
-    dat_plasmids = rank_plasmids(original_useful_plasmids = useful_plasmids)
-
-    def_pls = dat_plasmids.index[:int(args['n_plasmids'])]
-
-    for to_save in def_pls:
-        #print(to_save)
-        #print(useful_plasmids[to_save])
-        with open(to_save+".fa", "w") as handle:
-            handle.write(">"+to_save+"\n")
-            handle.write(useful_plasmids[to_save]["sequence"])
-
-
-
-    if args['max_row'] != None:
-        tmp_max_row = int(args['max_row'])
-    else:
-        tmp_max_row = 27
-
-    print_color_seq(original = n_pl,
-                    others = def_pls,
-                    annotation_information = useful_plasmids,
-                    tot = useful_plasmids,
-                    ind_range = None,
-                    patterns = n_poss,
-                    f_patterns = f_patts,
-                    patts = patts,
-                    max_row = tmp_max_row)
-
-
-    print_to_pdf(original = n_pl,
-                 others = def_pls,
-                 annotation_information = useful_plasmids,
-                 tot = useful_plasmids,
-                 ind_range = None,
-                 patterns = n_poss,
-                 f_patterns = f_patts,
-                 patts = patts,
-                 max_row = tmp_max_row)
b
diff -r 5397da1ef896 -r 1c31d6d25429 codon_switch.xml
--- a/codon_switch.xml Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,107 +0,0 @@
-<?xml version="1.0"?>
-<tool name="Codon Switch Tool" id="codon_switch" version="0.2.6">
-  <description>for each sequence in a file</description>
-  <requirements>
-    <requirement type="package" version="2.7.15">python</requirement>
-    <requirement type="package" version="1.72">biopython</requirement>
-    <requirement type="package" version="0.23.4">pandas</requirement>
-    <requirement type="package" version="1.15.3">numpy</requirement>
-  </requirements>
-
-  <command>
-<![CDATA[
-  python '$__tool_directory__/codon_switch.py' -i $input -l $input_type -p $patterns -g $genome -g $genome_type -c $codon_table -f $num_plasmids -m $row_len -d $demonstration -o $output
-]]>
-  </command>
-  <inputs>
-    <param name="input" format="genbank" type="data" label="Input plasmid"/>
-
-    <param name="input_type" type="select" format="text">
- <label>Indicate the input file format.</label>
- <option value="genbank">genbank</option>
- </param>
-
-    <param name="patterns" format="txt" type="data" label="Patterns file"/>
-
-    <param name="genome" format="genbank" type="data" label="Input Genome"/>
-
-    <param name="genome_type" type="select" format="text">
- <label>Indicate the input genome format.</label>
-      <option value="fasta">fasta</option>
-      <option value="genbank">genbank</option>
- </param>
-
-    <param name="demonstration" type="select" format="text">
- <label>Demonstration</label>
-      <option value="demonstration">yes</option>
-      <option value="no_demonstration">no</option>
- </param>
-
-    <param name="num_plasmids" type="select" format="text">
- <label>Indicate the number of best output plamids.</label>
-      <option value=3>3</option>
-      <option value=7>7</option>
- </param>
-
-    <param name="row_len" type="select" format="text">
- <label>Indicate the length of the row in report file.</label>
-      <option value=9>9</option>
-      <option value=27>27</option>
- </param>
-
-    <param name="codon_table" type="select" format="text">
- <label>Choose the proper codon table for your organism.</label>
-        <option value="Alternative Flatworm Mitochondrial">"Alternative Flatworm Mitochondrial"</option>
-        <option value="Alternative Yeast Nuclear">"Alternative Yeast Nuclear"</option>
-        <option value="Archaeal">"Archaeal"</option>
-        <option value="Ascidian Mitochondrial">"Ascidian Mitochondrial"</option>
-        <option value="Bacterial">"Bacterial"</option>
-        <option value="Blastocrithidia Nuclear">"Blastocrithidia Nuclear"</option>
-        <option value="Blepharisma Macronuclear">"Blepharisma Macronuclear"</option>
-        <option value="Candidate Division SR1">"Candidate Division SR1"</option>
-        <option value="Chlorophycean Mitochondrial">"Chlorophycean Mitochondrial"</option>
-        <option value="Ciliate Nuclear">"Ciliate Nuclear"</option>
-        <option value="Coelenterate Mitochondrial">"Coelenterate Mitochondrial"</option>
-        <option value="Condylostoma Nuclear">"Condylostoma Nuclear"</option>
-        <option value="Dasycladacean Nuclear">"Dasycladacean Nuclear"</option>
-        <option value="Echinoderm Mitochondrial">"Echinoderm Mitochondrial"</option>
-        <option value="Euplotid Nuclear">"Euplotid Nuclear"</option>
-        <option value="Flatworm Mitochondrial">"Flatworm Mitochondrial"</option>
-        <option value="Gracilibacteria">"Gracilibacteria"</option>
-        <option value="Hexamita Nuclear">"Hexamita Nuclear"</option>
-        <option value="Invertebrate Mitochondrial">"Invertebrate Mitochondrial"</option>
-        <option value="Karyorelict Nuclear">"Karyorelict Nuclear"</option>
-        <option value="Mesodinium Nuclear">"Mesodinium Nuclear"</option>
-        <option value="Mold Mitochondrial">"Mold Mitochondrial"</option>
-        <option value="Mycoplasma">"Mycoplasma"</option>
-        <option value="Pachysolen tannophilus Nuclear">"Pachysolen tannophilus Nuclear"</option>
-        <option value="Peritrich Nuclear">"Peritrich Nuclear"</option>
-        <option value="Plant Plastid">"Plant Plastid"</option>
-        <option value="Protozoan Mitochondrial">"Protozoan Mitochondrial"</option>
-        <option value="Pterobranchia Mitochondrial">"Pterobranchia Mitochondrial"</option>
-        <option value="SGC0">"SGC0"</option>
-        <option value="SGC1">"SGC1"</option>
-        <option value="SGC2">"SGC2"</option>
-        <option value="SGC3">"SGC3"</option>
-        <option value="SGC4">"SGC4"</option>
-        <option value="SGC5">"SGC5"</option>
-        <option value="SGC8">"SGC8"</option>
-        <option value="SGC9">"SGC9"</option>
-        <option value="Scenedesmus obliquus Mitochondrial">"Scenedesmus obliquus Mitochondrial"</option>
-        <option value="Spiroplasma">"Spiroplasma"</option>
-        <option value="Standard">"Standard"</option>
-        <option value="Thraustochytrium Mitochondrial">"Thraustochytrium Mitochondrial"</option>
-        <option value="Trematode Mitochondrial">"Trematode Mitochondrial"</option>
-        <option value="Vertebrate Mitochondrial">"Vertebrate Mitochondrial"</option>
-        <option value="Yeast Mitochondrial">"Yeast Mitochondrial"</option>
- </param>
-  </inputs>
-
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <help>
-This tool permits codon switch and transversion in targeted regions.
-  </help>
-</tool>
b
diff -r 5397da1ef896 -r 1c31d6d25429 comparison_syngenic_plasmids.pdf
--- a/comparison_syngenic_plasmids.pdf Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,583 +0,0 @@\n-%PDF-1.4\n-%\x93\x8c\x8b\x9e ReportLab Generated PDF document http://www.reportlab.com\n-1 0 obj\n-<<\n-/F1 2 0 R\n->>\n-endobj\n-2 0 obj\n-<<\n-/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font\n->>\n-endobj\n-3 0 obj\n-<<\n-/Contents 34 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-4 0 obj\n-<<\n-/Contents 35 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-5 0 obj\n-<<\n-/Contents 36 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-6 0 obj\n-<<\n-/Contents 37 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-7 0 obj\n-<<\n-/Contents 38 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-8 0 obj\n-<<\n-/Contents 39 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-9 0 obj\n-<<\n-/Contents 40 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-10 0 obj\n-<<\n-/Contents 41 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-11 0 obj\n-<<\n-/Contents 42 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-12 0 obj\n-<<\n-/Contents 43 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-13 0 obj\n-<<\n-/Contents 44 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-14 0 obj\n-<<\n-/Contents 45 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-15 0 obj\n-<<\n-/Contents 46 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-16 0 obj\n-<<\n-/Contents 47 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-17 0 obj\n-<<\n-/Contents 48 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-18 0 obj\n-<<\n-/Contents 49 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-19 0 obj\n-<<\n-/Contents 50 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-20 0 obj\n-<<\n-/Contents 51 0 R /MediaBox [ 0 0 612 792 ] /Parent 33 0 R /Resources <<\n-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]\n->> /Rotate 0 /Trans <<\n-\n->> \n-  /Type /Page\n->>\n-endobj\n-21 0 obj\n-<<\n-/Contents 52 0 R /MediaBox [ 0 0 612 792 ]'..b'<\'0C\'i8=g_,3j,m4r6_4bNK6,=1cJgEAk/Kq\'##nBBs#RZk/\'J.)"BF-s.9*bfhi)I(:TO8\'l_%N8i,m4?D#RZmU<4%=O?jT+&95l:_6;Xn>TO8)rQ*6K^qtSWYJ%bjWh<g+^D$kXp(PRBk-NpejJhsWaJ:]$L\'HFq4-pg1g+ZKQi#UoI65X>o#JfP!$a[%Vs%30s<";K\'&THFFK"YU-.BL+o"$64X3";I@KTM5`_JIr#!KHQrG"7c%j^BWH5<u&B\\]t8R/495![o`1X0o`1X,Qj<D(n,V>T*oOZH"+Zt@&+D@Y%r_^oi9,h"`(IiJHmO#fY7s)A((),@&3RaPK*MMn4MW<\\4MV1,&+JE5!PTL8^peq$piaIUS:Lq?Ol-8F;XaZnl/SCBDBFB%lK8\'2gZR/t\'*22f!^KunJ>`bKD@cj+%3j`B5m2_J#l\'g>#WMnun8gr"@\\k1K^(NiL<sg,_$O%,]#U83=5m.5r,2eXm,2e*+#l&9K!T+>1i7gO#I(LL&c9MO18.^>S.>WMX1ff$77;uW]!6e.;LmnAU2$)SD-K)MP"FN.q!"_\'i5>s6:B4(diDO<\\j\'MQ;DJdM3E_$"5@?ighS"FN.q5X<-gHud%+HuLb?h[Lpj]>4Vt4.(P:(JMU^R"-7b-4h[2\'C_:,of&*m!I[ZBR%"\'e2ZY%9!koUI^itD;phhbd+Dq70VI0JN^]aSj0S*=TR"2@C\'C_9=$,f0H"<<""phhd:J:\\+&jFR^5i*ZW#)be%kK*h<r0EL&`R"-tH!C%2j+BAgri%(o_A$upJGk_-TTCf[%_qOd.r5g\'j,C_V-fL2>A+uSuTK8;,W#m"/o]]t@"#SD-B(nVEbk*-m2:5DliT+iL#o4Nc53AZIe$P@Gl&>\'a_k0*g$%$>RB&>\'gak0+r32#us4)8t]mLcI8I:C[tXAq4NL83&J^:Ce%YAe:RV,3I)l5!_T\\R6c4TLmTt#4@MZ\\R6c4\\LmTuNp`omApKmgY=X%=Q1O+9>bPL8V1od+ts+^3=bVj3?qju^<ZhKUMi-cmMC:WM/AoBkr*khIG\\ueq;N\\!iR<N7[Ha&7&FWln7.WOb^PAQ2O,Y#9Cj@Yn!*Q)`#0U/YEpl:N=!Wif6BQ)a.*<L3rYWlp%#X"Fq*YIY]%^ZohTqM_GtK.^8K3T%9L<L<I9*E.*+OK#f2aAcFdA#_-l10\'\\8m1rWCH\'##:]^P9Y<aF"5Ct7W5VX!`Fb>qnR"9Q`K]!r#dW\\,oUN2heo+K+Ma,.L1o`C1ot02m"5&TGfj?!IL610#,,JUgGE77G"QA?.=8!WcT:lJa-%:j1C.9or:rE>)k^?/,QC<>RU17B.tX!g.r"Wf*?s6`0/.-4l9<]!r%LA?%7s!e)D!F`A\'#ke^=92h?LbX"J:AS>qL:+J\\5]-HN0t\\-XccF`A(NoZL3>g]STP<>RTFJ5"M49ctQWF\\rhA,MB)TRm94L;K_Y-acKJ-]p$eg<EJW71q)]7SUr2f\\pC9I8ZJrM.;4mEX*52FV!@69KZ7%uKC_\\cWO`:(_b&B93&=+*Y/-Ml;KWGo$uFHO3&=+*Y/-Ml;KWGo%$IW+E+Y22?%Xp8>BPqN%$IW+FCpV6?)\'3.V!8kh)\'i56kfghC?)\'3.V!8kh)\'i56kfb0K]#N2:gfJW@b@@Zcg9+C\\N\\!iR<N7[Ha&7&FWln7.WOb^PAQ2O,Y#9Cj@Yn!*Q)`#0U/YEpl:N=!Wif6BQ)a.*<L<;if!al7hQrh[FZI(eWlnTqs/msb3j$!+Dmu+6bBp/c"5VMhr2G*ne`FK$56%dg>ufLJ]=PY!g7,+c>KBcLcB2qnjh5:kql[)-<S.\'7lLK]#?r\\!ZJ[j&W]B\\B"&1cl"eO;nL=tETpE=n`>XtD2US>qL*+Qoo1>ufLJ]=PKX`\'U?diZ\\pMH=c!a[B&RRhd\'Nj#Tg)4i[,3QH=^/`g]V)_r4BP=#t0\\_W\\/1@Zes;%9or:rE>.Ege[;ICg2g=32h>)P<*e8^i^0j8]=N/c?0Ai#b;I+g<F+A_i^0j8Wf*?3XH\\39(H$bWYtW]e9lTg<\\u5pUW\\/1@S@c@U<F\'t>lJa-%$^n#QRm:6BQ$l`cY,^f8GBN]l#MY#X=\'_S"cm2WCjLhijWijJ6Nifl>Vk-GZY/r,+P,`;<$C[dUR[f$XWOaE(Ts7VLj:jFBeMT?%;c7a4c4#l9CNa<O<*cgd"\']omc4#l9CNa<O<*cgdK;@e$SJ`\\QfBg_s<aE$fK;@e$SJ`\\QfBg^(WO`jX#"d@Q3&@d=fA]du[d%%d)\'i56iliOE\\sCe_gfJX+c"!leLAt3%oq0<"*r@n5+8`P253I8"hM_@1^].fi42oPAelI7jSA2:9cWaW"qHc:5oUHlOpUh=2XD]0>8\'e^YHI&(9cLl(1`OhYk^+Na)bA$A0il-0T@UBp)VlrDS%B^f#s-]pg^B)aE%Sd~>endstream\n-endobj\n-xref\n-0 62\n-0000000000 65535 f \n-0000000073 00000 n \n-0000000104 00000 n \n-0000000211 00000 n \n-0000000406 00000 n \n-0000000601 00000 n \n-0000000796 00000 n \n-0000000991 00000 n \n-0000001186 00000 n \n-0000001381 00000 n \n-0000001576 00000 n \n-0000001772 00000 n \n-0000001968 00000 n \n-0000002164 00000 n \n-0000002360 00000 n \n-0000002556 00000 n \n-0000002752 00000 n \n-0000002948 00000 n \n-0000003144 00000 n \n-0000003340 00000 n \n-0000003536 00000 n \n-0000003732 00000 n \n-0000003928 00000 n \n-0000004124 00000 n \n-0000004320 00000 n \n-0000004516 00000 n \n-0000004712 00000 n \n-0000004908 00000 n \n-0000005104 00000 n \n-0000005300 00000 n \n-0000005496 00000 n \n-0000005692 00000 n \n-0000005762 00000 n \n-0000006046 00000 n \n-0000006296 00000 n \n-0000010689 00000 n \n-0000014290 00000 n \n-0000017887 00000 n \n-0000021473 00000 n \n-0000025034 00000 n \n-0000028620 00000 n \n-0000032178 00000 n \n-0000035887 00000 n \n-0000039533 00000 n \n-0000044441 00000 n \n-0000048062 00000 n \n-0000051584 00000 n \n-0000055200 00000 n \n-0000058791 00000 n \n-0000062614 00000 n \n-0000066297 00000 n \n-0000069880 00000 n \n-0000073536 00000 n \n-0000077245 00000 n \n-0000080911 00000 n \n-0000084650 00000 n \n-0000088341 00000 n \n-0000091968 00000 n \n-0000095725 00000 n \n-0000099373 00000 n \n-0000103020 00000 n \n-0000106516 00000 n \n-trailer\n-<<\n-/ID \n-[<0f726644b6ac1201feda0e0957c03aa4><0f726644b6ac1201feda0e0957c03aa4>]\n-% ReportLab generated PDF document -- digest (http://www.reportlab.com)\n-\n-/Info 32 0 R\n-/Root 31 0 R\n-/Size 62\n->>\n-startxref\n-109568\n-%%EOF\n'
b
diff -r 5397da1ef896 -r 1c31d6d25429 fastdivmod.py
--- a/fastdivmod.py Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,93 +0,0 @@
-#!/usr/bin/env python2
-#
-# Copyright 2011-2016 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# vim: sw=2 sts=2 et
-
-from math import log, ceil
-import sys
-
-
-def find_largest_power(less_than, base):
-    power = int(log(less_than) / log(base))
-    return base ** power
-
-
-def divmod_iter(x, by, chunk=None):
-    if x < by:
-        return [x]
-
-    if hasattr(x, 'bit_length'):
-        # crude log(2, x)
-        divisions = x.bit_length() // by.bit_length()
-    else:
-        divisions = log(x) / log(by)
-
-    if divisions < 1024:
-        return divmod_iter_basic(x, by, chunk)
-    else:
-        return divmod_iter_chunking(x, by, chunk)
-
-
-def divmod_iter_chunking(x, by, chunk=None):
-    """Generate successive (x % by); x /= by, but faster.
-
-    If provided, |chunk| must be a power of |by| (otherwise it is determined
-    automatically for 1024 per inner loop, based on analysis of bench_genmod.py)
-    """
-
-    if by == 1:
-        assert x == 0, x
-        yield 0
-        return
-
-    if chunk is None:
-        digits_per_chunk = 1024
-        chunk = by ** digits_per_chunk
-    else:
-        digits_per_chunk = int(round(log(chunk) / log(by)))
-        if (by ** digits_per_chunk) != chunk:
-            raise ValueError("Chunk=%d must be a power of by=%d" % (chunk, by))
-
-    assert digits_per_chunk > 0
-
-    while x:
-        x, this_chunk = divmod(x, chunk)
-        #this_chunk = int(this_chunk)
-        for _ in range(digits_per_chunk):
-            this_chunk, m = divmod(this_chunk, by)
-            yield m
-
-            if this_chunk == 0 and x == 0:
-                break
-
-
-def divmod_iter_basic(x, by, chunk=None):
-    """Generate successive (x % by); x /= by, the obvious way.
-
-    Chunk is ignored.
-    """
-    while x:
-        x, m = divmod(x, by)
-        yield m
-
-def powersum(x, low, high):
-    # http://mikestoolbox.com/powersum.html
-    xm1 = x - 1
-    if xm1 == 0:
-        return high - low + 1
-    a = x ** (high + 1)
-    b = x ** low
-    return (a - b) // xm1
b
diff -r 5397da1ef896 -r 1c31d6d25429 fastdivmod.pyc
b
Binary file fastdivmod.pyc has changed
b
diff -r 5397da1ef896 -r 1c31d6d25429 functions.py
--- a/functions.py Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,960 +0,0 @@\n-import string\r\n-from syngenic import *\r\n-from Bio.Seq import Seq\r\n-from Bio.SeqFeature import SeqFeature, FeatureLocation\r\n-from pprint import pprint\r\n-\r\n-from itertools import izip\r\n-\r\n-import numpy as np\r\n-import pandas as pd\r\n-\r\n-def all_patterns(input_ = []):\r\n-\r\n-    patts = []\r\n-    n_patts = []\r\n-\r\n-    for patt in input_:\r\n-        tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)\r\n-        tmp_revc = tmp_patt.reverse_complement()\r\n-\r\n-        patts.append(str(tmp_patt))\r\n-        patts.append(str(tmp_revc))\r\n-\r\n-        n_patts.append(pattern(tmp_patt).plan_ambiguity())\r\n-        n_patts.append(pattern(tmp_revc).plan_ambiguity())\r\n-\r\n-\r\n-    return patts, n_patts\r\n-\r\n-def fake_from_real(path = None, id_ = None, name = None):\r\n-\r\n-    plasmid_seq = SeqIO.read(open(path, "r"), "genbank")\r\n-\r\n-    f_p = plasmid_seq.seq[:10]\r\n-    f_CDS = []\r\n-    for f in plasmid_seq.features:\r\n-        if f.type == "CDS":\r\n-            tmp_start = len(f_p)\r\n-            tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]\r\n-            tmp_end = tmp_start + len(tmp_cds)\r\n-            f_p += tmp_cds\r\n-            f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))\r\n-            #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]\r\n-    f_p += plasmid_seq.seq[-10:]\r\n-\r\n-    for feat in f_CDS:\r\n-        f_p.features.append(feat)\r\n-    f_p.id = id_\r\n-    f_p.name = name\r\n-\r\n-    #feature_seq_0 = f_CDS[0].extract(f_p)\r\n-\r\n-    return f_p\r\n-\r\n-def punctuate_targets(f_patts, n_pl):\r\n-\r\n-    n_poss = {}\r\n-    max_len = len(n_pl)\r\n-    for key in f_patts.keys():\r\n-        for el in f_patts[key]:\r\n-            if not el[2] < el[1]:\r\n-                tmp = range(el[1], el[2])\r\n-                for i in range(len(tmp)):\r\n-                    if not tmp[i] in n_poss.keys():\r\n-                        n_poss[tmp[i]] = [key[i]]\r\n-                    else:\r\n-                        n_poss[tmp[i]].append(key[i])\r\n-            else:\r\n-                tmp = range(el[1], max_len) + range(0, el[2])\r\n-                for i in range(len(tmp)):\r\n-                    if not tmp[i] in n_poss.keys():\r\n-                        n_poss[tmp[i]] = [key[i]]\r\n-                    else:\r\n-                        n_poss[tmp[i]].append(key[i])\r\n-\r\n-    for key in n_poss.keys():\r\n-        n_poss[key] = set(n_poss[key])\r\n-\r\n-    #print(n_poss)\r\n-\r\n-    return n_poss\r\n-\r\n-\r\n-def print_seq(n_pl, ind_range = None):\r\n-\r\n-    if ind_range == None:\r\n-\r\n-        data = filter(None, re.split(r\'(\\w{1})\', n_pl))\r\n-        index = range(len(n_pl))\r\n-\r\n-        seq = []\r\n-        ind = []\r\n-\r\n-        j = 0\r\n-\r\n-        seq.append("")\r\n-        ind.append("")\r\n-\r\n-        for i in range(len(data)):\r\n-\r\n-            if (i % 9 == 0) & (i > 0):\r\n-                j += 1\r\n-                seq.append("")\r\n-                ind.append("")\r\n-                print("\\n")\r\n-                print(seq[j-1])\r\n-                print(ind[j-1])\r\n-\r\n-\r\n-            seq[j] += " "\r\n-            ind[j] += " "\r\n-            for n in range(len(str(index[i]))-1):\r\n-                seq[j] += " "\r\n-            seq[j] += data[i]\r\n-            ind[j] += str(index[i])\r\n-        print("\\n")\r\n-        print(seq[j])\r\n-        print(ind[j])\r\n-    else:\r\n-        data = filter(None, re.split(r\'(\\w{1})\', n_pl[ind_range[0]:ind_range[1]]))\r\n-        index = range(ind_range[0], ind_range[1])\r\n-\r\n-        seq = []\r\n-        ind = []\r\n-\r\n-        j = 0\r\n-\r\n-        seq.append("")\r\n-        ind.append("")\r\n-\r\n-        for i in range(len(data)):\r\n-\r\n-            if (i % 9 == 0) & (i > 0):\r\n-                j += 1\r\n-                seq.append("")\r\n-                ind.append("")\r\n-                print("\\n")\r\n-                print(seq[j-1])\r\n-                print(ind[j-1])\r\n-\r\n-\r\n-            seq[j] += " "\r\n-            ind[j] += " "\r\n-            for n in range(len(str(index[i]))-1):\r\n-                seq[j] += '..b'arget_positions,\r\n-            2:annot,\r\n-            3:direction,\r\n-            4:["Original"] + sequences["original"],\r\n-            5:new_plasmids,\r\n-            6:index}\r\n-\r\n-    doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,\r\n-                        rightMargin=30,leftMargin=30,\r\n-                        topMargin=30,bottomMargin=30)\r\n-\r\n-    elements = []\r\n-    #max_row = 18\r\n-    blocks = {}\r\n-\r\n-    if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:\r\n-        n_blocks = len(range(max_row, len(original.seq)+1, max_row))\r\n-    else:\r\n-        n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1\r\n-\r\n-    j = 0\r\n-\r\n-    for i in range(n_blocks):\r\n-        blocks[i] = []\r\n-        for l in range(7):\r\n-            if l in [0, 5]:\r\n-                for el in data[l]:\r\n-                    if len(el[j:]) > max_row:\r\n-                        if i >= 1:\r\n-                            blocks[i].append([el[0]] + el[j:j+max_row])\r\n-                        else:\r\n-                            blocks[i].append(el[j:j+max_row])\r\n-                    else:\r\n-                        blocks[i].append([el[0]] + el[j:])\r\n-            else:\r\n-                if len(data[l][j:]) > max_row:\r\n-                    if i >= 1:\r\n-                        blocks[i].append([data[l][0]] + data[l][j:j+max_row])\r\n-                    else:\r\n-                        blocks[i].append(data[l][j:j+max_row])\r\n-                else:\r\n-                    blocks[i].append([data[l][0]] + data[l][j:])\r\n-        j += max_row\r\n-        #print("\\n")\r\n-        #print(blocks[i])\r\n-\r\n-        elements.append(Table(blocks[i], hAlign=\'LEFT\'))#,\r\n-                        #style=[(\'BACKGROUND\',(0,0),(0,0),colors.palegreen),\r\n-                        #       (\'BACKGROUND\',(1,1),(1,1),colors.palegreen),\r\n-                        #       (\'TEXTCOLOR\',(2,2),(3,2),colors.palegreen),\r\n-                        #       (\'BOX\',(0,0),(0,0),2,colors.red)]))\r\n-        elements.append(Table([["", "", "", "", ""]]))\r\n-\r\n-    doc.build(elements)\r\n-\r\n-\r\n-    #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,\r\n-    #                            rightMargin=30,leftMargin=30,\r\n-    #                            topMargin=30,bottomMargin=30)\r\n-    #new_elements = []\r\n-\r\n-    #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])\r\n-    #new_elements.append(f_patterns)\r\n-\r\n-    #doc.build(new_elements)\r\n-\r\n-    c = canvas.Canvas("./further_information.pdf")\r\n-    c.drawString(100,750,"CDS regions:")\r\n-    upper_bound = 750\r\n-    for feat in original.features:\r\n-        if feat.type.lower() in ["gene", "cds"]:\r\n-            upper_bound -= 15\r\n-            if feat.location.strand == -1:\r\n-                sign = "-"\r\n-            else:\r\n-                sign = "+"\r\n-            c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")\r\n-    upper_bound -= 30\r\n-    c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")\r\n-    for f_pattern in f_patterns.keys():\r\n-        upper_bound -= 15\r\n-        c.drawString(115,upper_bound,f_pattern + ":")\r\n-        for val in f_patterns[f_pattern]:\r\n-            upper_bound -= 15\r\n-            c.drawString(130,upper_bound,str(val))\r\n-        upper_bound -= 5\r\n-\r\n-    upper_bound -= 30\r\n-    c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")\r\n-    for target in targets.keys():\r\n-        upper_bound -= 15\r\n-        c.drawString(115,upper_bound,target + ": " + targets[target])\r\n-\r\n-    c.save()\r\n-\r\n-\r\n-    return\r\n-\r\n-\r\n-def produce_random_targets(sequence):\r\n-\r\n-    # Produce a target on two continous CDS\r\n-    # Produce a target in a non-coding region\r\n-    # Produce a target in coding region\r\n-    # Produce a target on a overlapping left\r\n-    # Produce a target on a overlapping right\r\n-\r\n-\r\n-\r\n-    return\r\n'
b
diff -r 5397da1ef896 -r 1c31d6d25429 functions.pyc
b
Binary file functions.pyc has changed
b
diff -r 5397da1ef896 -r 1c31d6d25429 further_information.pdf
--- a/further_information.pdf Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,68 +0,0 @@
-%PDF-1.3
-%���� ReportLab Generated PDF document http://www.reportlab.com
-1 0 obj
-<<
-/F1 2 0 R
->>
-endobj
-2 0 obj
-<<
-/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
->>
-endobj
-3 0 obj
-<<
-/Contents 7 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 6 0 R /Resources <<
-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
->> /Rotate 0 /Trans <<
-
->> 
-  /Type /Page
->>
-endobj
-4 0 obj
-<<
-/PageMode /UseNone /Pages 6 0 R /Type /Catalog
->>
-endobj
-5 0 obj
-<<
-/Author (anonymous) /CreationDate (D:20190520215146-01'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20190520215146-01'00') /Producer (ReportLab PDF Library - www.reportlab.com) 
-  /Subject (unspecified) /Title (untitled) /Trapped /False
->>
-endobj
-6 0 obj
-<<
-/Count 1 /Kids [ 3 0 R ] /Type /Pages
->>
-endobj
-7 0 obj
-<<
-/Filter [ /ASCII85Decode /FlateDecode ] /Length 412
->>
-stream
-Gat%];+ne\'SYH=/+-V'RG)1GX*r0c#!n!fb@#-MPVo!DQd3&:s(4iV+Y7"&pS%Q2c+W9O8F.Y*DYF,eK\IjUKEB^id[Ls?"WhYo^QS[E8HLnF!;@%_LGrns:,f%#`dj\3=+=r$+J1DZY6iBC5q/MrS+q:c%Oe:E^S]JE'f8_8TYaHgh0[+Vg%VL(*L*FQr^4N1Bj9+.)H1@VK>)/GWpcT8mcWQ0m.S#B:444LfrDh#=mhX7K$W7UTTgUkH%%J8/qapF1gbt_K25`Ea%DAYkc*B_n7WJ*<ksjXWC@O?)e5e7AJ9m,0h/QJaQtJJNB$bXKt-oZV:jZ9^#1EU'$;>hH%%Xg+GH'$Q`Eal3t.n@LZ_9t?IrAg2R(m=p7#FF*,WFdh^L>?Va8pi(3&F/R0lDJ#Dp=`~>endstream
-endobj
-xref
-0 8
-0000000000 65535 f 
-0000000073 00000 n 
-0000000104 00000 n 
-0000000211 00000 n 
-0000000414 00000 n 
-0000000482 00000 n 
-0000000778 00000 n 
-0000000837 00000 n 
-trailer
-<<
-/ID 
-[<a048fbc90e461707589cdb3f47c562fc><a048fbc90e461707589cdb3f47c562fc>]
-% ReportLab generated PDF document -- digest (http://www.reportlab.com)
-
-/Info 5 0 R
-/Root 4 0 R
-/Size 8
->>
-startxref
-1339
-%%EOF
b
diff -r 5397da1ef896 -r 1c31d6d25429 pEPSA5_annotated.gb
--- a/pEPSA5_annotated.gb Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,192 +0,0 @@\n-LOCUS       Exported                6850 bp ds-DNA     circular SYN 13-NOV-2018\r\n-DEFINITION  synthetic circular DNA\r\n-ACCESSION   .\r\n-VERSION     .\r\n-KEYWORDS    .\r\n-SOURCE      synthetic DNA construct\r\n-  ORGANISM  synthetic DNA construct\r\n-REFERENCE   1  (bases 1 to 6850)\r\n-  AUTHORS   .\r\n-  TITLE     Direct Submission\r\n-  JOURNAL   Exported Nov 13, 2018 from SnapGene 4.2.6\r\n-            http://www.snapgene.com\r\n-COMMENT     LOCUS dna 6850 bp\r\n-FEATURES             Location/Qualifiers\r\n-     source          1..6850\r\n-                     /organism="synthetic DNA construct"\r\n-                     /mol_type="other DNA"\r\n-     misc_feature    1..819\r\n-                     /label=p15A origin of replication region\r\n-                     /note="p15A origin of replication region"\r\n-     misc_feature    1032..3625\r\n-                     /label=Staphylococcus aureus plasmid pC194\r\n-                     /note="Staphylococcus aureus plasmid pC194 region\r\n-                     (NC_002013.1)"\r\n-     misc_feature    1126..1324\r\n-                     /label=Repeat sequence 1 of 2\r\n-                     /note="Repeat  sequence 1 of 2"\r\n-     CDS             complement(1562..2212)\r\n-                     /label=Chloramphenicol Resistance\r\n-     CDS             complement(2346..2744)\r\n-                     /label=Potential Copy number associated protein\r\n-                     /note="Potential Copy number associated protein"\r\n-     misc_feature    2463..2517\r\n-                     /label=pC194 replication origin\r\n-                     /note="pC194 replication origin (detailed in PMC401278)"\r\n-     misc_feature    2670..3605\r\n-                     /label=Replication initiation protein Rep RC\r\n-     misc_feature    3626..3824\r\n-                     /label=Repeat sequence 2 of 2\r\n-                     /note="Repeat sequence 2 of 2"\r\n-     misc_feature    3825..5278\r\n-                     /label=Xylose Inducible Promoter Region\r\n-                     /note="Xylose Inducible Promoter Region, Xyl promoter, Xyl\r\n-                     repressor protein, Xyl terminator sequence and pTX5\r\n-                     promoter facing MCS:\r\n-                     http://parts.igem.org/Part:BBa_K1323014"\r\n-     CDS             4000..5151\r\n-                     /label=XylR repressor protein\r\n-     misc_feature    5191..5241\r\n-                     /label=Bacteriophage T5 PN25 promoter\r\n-                     /note="Bacteriophage T5 PN25 promoter (Deuschle et al,\r\n-                     PMC1167251)"\r\n-     misc_feature    5206..5211\r\n-                     /label=-30 region\r\n-                     /note="-30 region"\r\n-     misc_feature    5229..5234\r\n-                     /label=-10 region\r\n-                     /note="-10 region"\r\n-     misc_feature    5241..5279\r\n-                     /label=Xyl operator containing palindromic\r\n-                     /note="interfering with transcription initiation"\r\n-     misc_feature    5279..5317\r\n-                     /label=MCS\r\n-                     /note="MCS"\r\n-     terminator      5393..5550\r\n-                     /label=rrnB term\r\n-     terminator      5516..5559\r\n-                     /label=rrnB_T1 term\r\n-     terminator      5691..5718\r\n-                     /label=rrnB_T2 term\r\n-     misc_feature    complement(5860..6720)\r\n-                     /label=Ampicillin Resistance\r\n-                     /note="gene of the plasmid pLEX5BA"\r\n-     promoter        complement(6762..6790)\r\n-                     /label=amp prom\r\n-ORIGIN\r\n-        1 ggcggccgca ctggcttact atgttggcac tgatgagggt gtcagtgaag tgcttcatgt\r\n-       61 ggcaggagaa aaaaggctgc accggtgcgt cagcagaata tgtgatacag gatatattcc\r\n-      121 gcttcctcgc tcactgactc gctacgctcg gtcgttcgac tgcggcgagc ggaaatggct\r\n-      181 tacgaacggg gcggagattt cctggaagat gccaggaaga tacttaacag ggaagtgaga\r\n-      241 gggccgcggc aaagccgttt ttccataggc tccgcccccc tgacaagcat cacgaaatct\r\n-      301 gacgctcaaa tcagtggtgg cgaaacccga caggactata aagataccag gcgtttcccc\r\n-      361 ctggcggct'..b'781 ataaaaccac tcctttttaa caaactttat cacaagaaat attttggcat tctacgacta\r\n-     3841 taacttaaat ttatattttt tactttataa tatataattg attatagaat aatgttgctc\r\n-     3901 atatcgtttg ccaacatcta gtactcaaat tacactatgt tacacttggt aatattaacc\r\n-     3961 gaacttcccc tgtccaaatt agataagagg taataataaa tggaaaataa ttttatagta\r\n-     4021 aatgaaaatg agaagcgtgt attaaaacaa attttcaata acagcaatat ttcacgaaca\r\n-     4081 caaatatcga agaatttaga acttaataaa gctactattt ctaacattct gaacaactta\r\n-     4141 aaacacaaga gtttagttaa tgaagtagga gaaggtaata gtactaaaag tggtggacga\r\n-     4201 aagcctattt tactcgaaat taaccaaaaa tatggctact atatttctat ggatttaaca\r\n-     4261 tatgattccg ttgaattaat gtacaactac tttgatgcta ctatattaaa gcaagattcc\r\n-     4321 tacgaattaa atgataaaaa tgtaagcagt atattacaaa ttttaaaatc taatataaac\r\n-     4381 gtctcagaaa aatatgatac gttatatggg ttacttggta tatctatatc catacacggt\r\n-     4441 atcgttgacg atgagcaaaa cataatcaat cttccttttc ataaaaatga gaaacgcaca\r\n-     4501 tttaccgatg aattaaagtc attcacaaat gttcctgtcg ttatagaaaa tgaagcaaat\r\n-     4561 ttatcagcgc tatatgaaaa aagtttatat attaattcaa acataaataa tttgattact\r\n-     4621 ttaagtattc acaagggtat aggcgctggc atcctaataa ataaaaaact ttatcgtggc\r\n-     4681 tcaaatggag aggctggaga gataggtaag acattggttt tggaatctat aaataacaat\r\n-     4741 gacaacaaat attataaaat cgaagatata tgctcccaag acgctttaat acagaaaata\r\n-     4801 aataataggt tgggcgtcac attgacgttt acagaactaa tccaatatta caacgaagga\r\n-     4861 aattcaattg ttgctcatga aattaaacaa tttattaata aaatgacagt tctgattcat\r\n-     4921 aatttgaata cacaatttaa cccagacgct atttatatta actgtccttt aattaatgaa\r\n-     4981 ttaccaaata ttttaaatga aattaaagag caattctcct gtttttctca aggcagtcca\r\n-     5041 gttcaattac atttaactac taatgtaaaa caagctactt tattgggtgg cactttagca\r\n-     5101 ataatgcaaa aaacattaaa tataaataac attcaaatga atattaaata attacagcag\r\n-     5161 tctgagttat aaaatagata tctcggaccg tcataaaaaa tttatttgct ttcaggaaaa\r\n-     5221 tttttctgta taatagattc aagttagttt gtttattaaa ttaaccaact aaaatgtaga\r\n-     5281 attcgagctc ggtacccggg gatcctctag agtcgacctg cagccaagct tgggcttttc\r\n-     5341 agcctgatac agattaaatc agaacgcaga agcggtctga taaaacagaa tttgcctggc\r\n-     5401 ggcagtagcg cggtggtccc acctgacccc atgccgaact cagaagtgaa acgccgtagc\r\n-     5461 gccgatggta gtgtggggtc tccccatgcg agagtaggga actgccaggc atcaaataaa\r\n-     5521 acgaaaggct cagtcgaaag actgggcctt tcgttttatc tgttgtttgt cggtgaacgc\r\n-     5581 tctcctgagt aggacaaatc cgccgggagc ggatttgaac gttgcgaagc aacggcccgg\r\n-     5641 agggtggcgg gcaggacgcc cgccataaac tgccaggcat caaattaagc agaaggccat\r\n-     5701 cctgacggat ggcctttttg cgtttctaca aactcttttg tttatttttc taaatacatt\r\n-     5761 caaatatgta tccgctcatc cccatcctat cgatgataag ctgtcaaaca tgagaattaa\r\n-     5821 atcaatctaa agtatatatg agtaaacttg gtctgacagt taccaatgct taatcagtga\r\n-     5881 ggcacctatc tcagcgatct gtctatttcg ttcatccata gttgcctgac tccccgtcgt\r\n-     5941 gtagataact acgatacggg agggcttacc atctggcccc agtgctgcaa tgataccgcg\r\n-     6001 agacccacgc tcaccggctc cagatttatc agcaataaac cagccagccg gaagggccga\r\n-     6061 gcgcagaagt ggtcctgcaa ctttatccgc ctccatccag tctattaatt gttgccggga\r\n-     6121 agctagagta agtagttcgc cagttaatag tttgcgcaac gttgttgcca ttgctacagg\r\n-     6181 catcgtggtg tcacgctcgt cgtttggtat ggcttcattc agctccggtt cccaacgatc\r\n-     6241 aaggcgagtt acatgatccc ccatgttgtg caaaaaagcg gttagctcct tcggtcctcc\r\n-     6301 gatcgttgtc agaagtaagt tggccgcagt gttatcactc atggttatgg cagcactgca\r\n-     6361 taattctctt actgtcatgc catccgtaag atgcttttct gtgactggtg agtactcaac\r\n-     6421 caagtcattc tgagaatagt gtatgcggcg accgagttgc tcttgcccgg cgtcaacacg\r\n-     6481 ggataatacc gcgccacata gcagaacttt aaaagtgctc atcattggaa aacgctcttc\r\n-     6541 ggggcgaaaa ctctcaagga tcttaccgct gttgagatcc agttcgatgt aacccactcg\r\n-     6601 tgcacccaac tgatcttcag catcttttac tttcaccagc gtttctgggt gagcaaaaac\r\n-     6661 aggaaggcaa aatgccgcaa aaaagggaat aagggcgaca cggaaatgtt gaatactcat\r\n-     6721 actcttcctt tttcaatatt attgaagcat ttatcagggt tattgtctca tgagcggata\r\n-     6781 catatttgaa tgtatttaga aaaataaaca aataggggtt ccgcgcacat ttccccgaaa\r\n-     6841 agtgccacct\r\n-//\r\n'
b
diff -r 5397da1ef896 -r 1c31d6d25429 patterns.txt
--- a/patterns.txt Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
-ACANNNNNNRTGG
-ATCNNNNNCCT
-BNNNNNNNGCGGTAVY
b
diff -r 5397da1ef896 -r 1c31d6d25429 run_codon_switch.sh
--- a/run_codon_switch.sh Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
-#python setup.py build_ext --inplace
-python codon_switch.py -i pEPSA5_annotated.gb -l genbank -p patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -f 3 -m 27 -d demonstration -o ./output # -d demonstration 
b
diff -r 5397da1ef896 -r 1c31d6d25429 sre_yield.py
--- a/sre_yield.py Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,611 +0,0 @@\n-#!/usr/bin/env python2\n-#\n-# Copyright 2011-2016 Google Inc.\n-#\n-# Licensed under the Apache License, Version 2.0 (the "License");\n-# you may not use this file except in compliance with the License.\n-# You may obtain a copy of the License at\n-#\n-#      http://www.apache.org/licenses/LICENSE-2.0\n-#\n-# Unless required by applicable law or agreed to in writing, software\n-# distributed under the License is distributed on an "AS IS" BASIS,\n-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n-# See the License for the specific language governing permissions and\n-# limitations under the License.\n-#\n-# vim: sw=2 sts=2 et\n-\n-"""This module can generate all strings that match a regular expression.\n-\n-The regex is parsed using the SRE module that is standard in python,\n-then the data structure is executed to form a bunch of iterators.\n-"""\n-\n-__author__ = \'alexperry@google.com (Alex Perry)\'\n-__all__ = [\'Values\', \'AllStrings\', \'AllMatches\', \'ParseError\']\n-\n-\n-import bisect\n-import math\n-import re\n-import sre_constants\n-import sre_parse\n-import string\n-import sys\n-import types\n-\n-import cachingseq\n-import fastdivmod\n-\n-try:\n-    xrange = xrange\n-except NameError:\n-    xrange = range\n-\n-_RE_METACHARS = r\'$^{}*+\\\\\'\n-_ESCAPED_METACHAR = r\'\\\\[\' + _RE_METACHARS + r\']\'\n-ESCAPED_METACHAR_RE = re.compile(_ESCAPED_METACHAR)\n-# ASCII by default, see https://github.com/google/sre_yield/issues/3\n-CHARSET = [chr(c) for c in range(256)]\n-\n-WORD = string.ascii_letters + string.digits + \'_\'\n-\n-try:\n-    DEFAULT_RE_FLAGS = re.ASCII\n-except AttributeError:\n-    DEFAULT_RE_FLAGS = 0\n-\n-STATE_START, STATE_MIDDLE, STATE_END = list(range(3))\n-\n-def Not(chars):\n-    return \'\'.join(sorted(set(CHARSET) - set(chars)))\n-\n-\n-CATEGORIES = {\n-    sre_constants.CATEGORY_WORD: WORD,\n-    sre_constants.CATEGORY_NOT_WORD: Not(WORD),\n-    sre_constants.CATEGORY_DIGIT: string.digits,\n-    sre_constants.CATEGORY_NOT_DIGIT: Not(string.digits),\n-    sre_constants.CATEGORY_SPACE: string.whitespace,\n-    sre_constants.CATEGORY_NOT_SPACE: Not(string.whitespace),\n-}\n-\n-# This constant varies between builds of Python; this is the lower value.\n-MAX_REPEAT_COUNT = 65535\n-\n-\n-class ParseError(Exception):\n-    pass\n-\n-\n-def slice_indices(slice_obj, size):\n-    """slice_obj.indices() except this one supports longs."""\n-    # start stop step\n-    start = slice_obj.start\n-    stop = slice_obj.stop\n-    step = slice_obj.step\n-\n-    # We don\'t always update a value for negative indices (if we wrote it here\n-    # due to None).\n-    if step is None:\n-        step = 1\n-    if start is None:\n-        if step > 0:\n-            start = 0\n-        else:\n-            start = size - 1\n-    else:\n-        start = _adjust_index(start, size)\n-\n-    if stop is None:\n-        if step > 0:\n-            stop = size\n-        else:\n-            stop = -1\n-    else:\n-        stop = _adjust_index(stop, size)\n-\n-    return (start, stop, step)\n-\n-\n-def _adjust_index(n, size):\n-    if n < 0:\n-        n += size\n-\n-    if n < 0:\n-        raise IndexError("Out of range")\n-    if n > size:\n-        n = size\n-    return n\n-\n-\n-def _xrange(*args):\n-    """Because xrange doesn\'t support longs :("""\n-    # prefer real xrange if it works\n-    try:\n-        return xrange(*args)\n-    except OverflowError:\n-        return _bigrange(*args)\n-\n-\n-def _bigrange(*args):\n-    if len(args) == 1:\n-        start = 0; stop = args[0]; step = 1\n-    elif len(args) == 2:\n-        start, stop = args\n-        step = 1\n-    elif len(args) == 3:\n-        start, stop, step = args\n-    else:\n-        raise ValueError("Too many args for _bigrange")\n-\n-    i = start\n-    while True:\n-        yield i\n-        i += step\n-        if step < 0 and i <= stop:\n-            break\n-        if step > 0 and i >= stop:\n-            break\n-\n-\n-class WrappedSequence(object):\n-    """This wraps a sequence, purely as a base clase for the other uses."""\n-\n-    def __init__(self, raw):\n-        # Derived classes will li'..b'm/google/sre_yield/issues/3\n-        if flags & re.IGNORECASE:\n-            raise ParseError(\'Flag "i" not supported. https://github.com/google/sre_yield/issues/4\')\n-        elif flags & re.UNICODE:\n-            raise ParseError(\'Flag "u" not supported. https://github.com/google/sre_yield/issues/3\')\n-        elif flags & re.LOCALE:\n-            raise ParseError(\'Flag "l" not supported. https://github.com/google/sre_yield/issues/5\')\n-\n-        if max_count is None:\n-            self.max_count = MAX_REPEAT_COUNT\n-        else:\n-            self.max_count = max_count\n-\n-        self.has_groupref = False\n-\n-        # Configure the parser backends\n-        self.backends = {\n-            sre_constants.LITERAL: lambda y: [chr(y)],\n-            sre_constants.RANGE: lambda l, h: [chr(c) for c in range(l, h+1)],\n-            sre_constants.SUBPATTERN: self.maybe_save,\n-            sre_constants.BRANCH: self.branch_values,\n-            sre_constants.MIN_REPEAT: self.max_repeat_values,\n-            sre_constants.MAX_REPEAT: self.max_repeat_values,\n-            sre_constants.AT: self.nothing_added,\n-            sre_constants.ASSERT: self.empty_list,\n-            sre_constants.ASSERT_NOT: self.empty_list,\n-            sre_constants.ANY:\n-                lambda _: self.in_values(((sre_constants.NEGATE,),)),\n-            sre_constants.IN: self.in_values,\n-            sre_constants.NOT_LITERAL: self.not_literal,\n-            sre_constants.CATEGORY: self.category,\n-            sre_constants.GROUPREF: self.groupref,\n-        }\n-        self.state = STATE_START\n-        # Now build a generator that knows all possible patterns\n-        self.raw = self.sub_values(sre_parse.parse(pattern, flags))\n-        # Configure this class instance to know about that result\n-        self.length = self.raw.__len__()\n-\n-    def __contains__(self, item):\n-        # Since we have a regex, we can search the list really cheaply\n-        return self.matcher.match(item) is not None\n-\n-\n-class RegexMembershipSequenceMatches(RegexMembershipSequence):\n-    def __getitem__(self, i):\n-        if isinstance(i, slice):\n-            result = SlicedSequence(self, slicer=i)\n-            if result.__len__() < 16:\n-                # Short lists are unpacked\n-                result = [item for item in result]\n-            return result\n-\n-        d = {}\n-        s = super(RegexMembershipSequenceMatches, self).get_item(i, d)\n-        return Match(s, d, self.named_group_lookup)\n-\n-\n-def AllStrings(regex, flags=0, charset=CHARSET, max_count=None):\n-    """Constructs an object that will generate all matching strings."""\n-    return RegexMembershipSequence(regex, flags, charset, max_count=max_count)\n-\n-Values = AllStrings\n-\n-\n-class Match(object):\n-    def __init__(self, string, groups, named_groups):\n-        # TODO keep group(0) only, and spans for the rest.\n-        self._string = string\n-        self._groups = groups\n-        self._named_groups = named_groups\n-        self.lastindex = len(groups) + 1\n-\n-    def group(self, n=0):\n-        if n == 0:\n-            return self._string\n-        if not isinstance(n, int):\n-            n = self._named_groups[n]\n-        return self._groups[n]\n-\n-    def groups(self):\n-        return tuple(self._groups[i] for i in range(1, self.lastindex))\n-\n-    def groupdict(self):\n-        d = {}\n-        for k, v in self._named_groups.items():\n-            d[k] = self._groups[v]\n-        return d\n-\n-    def span(self, n=0):\n-        raise NotImplementedError()\n-\n-\n-def AllMatches(regex, flags=0, charset=CHARSET, max_count=None):\n-    """Constructs an object that will generate all matching strings."""\n-    return RegexMembershipSequenceMatches(regex, flags, charset, max_count=max_count)\n-\n-\n-def main(argv=None):\n-    """This module can be executed on the command line for testing."""\n-    if argv is None:\n-        argv = sys.argv\n-    for arg in argv[1:]:\n-        for i in AllStrings(arg):\n-            print(i)\n-\n-\n-if __name__ == \'__main__\':\n-    main()\n'
b
diff -r 5397da1ef896 -r 1c31d6d25429 sre_yield.pyc
b
Binary file sre_yield.pyc has changed
b
diff -r 5397da1ef896 -r 1c31d6d25429 syngenic.py
--- a/syngenic.py Tue May 21 05:05:15 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,362 +0,0 @@\n-#!/usr/bin/env python\r\n-\r\n-__author__= "Gianmarco Piccinno"\r\n-__version__ = "1.0.0"\r\n-\r\n-import Bio\r\n-from Bio import SeqIO\r\n-from Bio.Seq import Seq\r\n-from Bio.Alphabet import IUPAC\r\n-from Bio.Data import IUPACData\r\n-from Bio.Data import CodonTable\r\n-import re\r\n-import sre_yield\r\n-\r\n-import re\r\n-import itertools\r\n-from functools import reduce\r\n-\r\n-import Bio\r\n-from Bio import Data\r\n-from Bio.Data import IUPACData\r\n-from Bio.Data import CodonTable\r\n-\r\n-from pprint import pprint\r\n-\r\n-import pandas as pd\r\n-\r\n-def _check_bases(seq_string):\r\n-    """\r\n-    Check characters in a string (PRIVATE).\r\n-    Remove digits and white space present in string. Allows any valid ambiguous\r\n-    IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, upper case are converted).\r\n-\r\n-    Other characters (e.g. symbols) trigger a TypeError.\r\n-\r\n-    Returns the string WITH A LEADING SPACE (!). This is for backwards\r\n-    compatibility, and may in part be explained by the fact that\r\n-    Bio.Restriction doesn\'t use zero based counting.\r\n-    """\r\n-    # Remove white space and make upper case:\r\n-    seq_string = "".join(seq_string.split()).upper()\r\n-    # Remove digits\r\n-    for c in "0123456789":\r\n-        seq_string = seq_string.replace(c, "")\r\n-    # Check only allowed IUPAC letters\r\n-    if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")):\r\n-        raise TypeError("Invalid character found in %s" % repr(seq_string))\r\n-    return " " + seq_string\r\n-\r\n-    matching = {\'A\': \'ARWMHVDN\', \'C\': \'CYSMHBVN\', \'G\': \'GRSKBVDN\',\r\n-                \'T\': \'TYWKHBDN\', \'R\': \'ABDGHKMNSRWV\', \'Y\': \'CBDHKMNSTWVY\',\r\n-                \'W\': \'ABDHKMNRTWVY\', \'S\': \'CBDGHKMNSRVY\', \'M\': \'ACBDHMNSRWVY\',\r\n-                \'K\': \'BDGHKNSRTWVY\', \'H\': \'ACBDHKMNSRTWVY\',\r\n-                \'B\': \'CBDGHKMNSRTWVY\', \'V\': \'ACBDGHKMNSRWVY\',\r\n-                \'D\': \'ABDGHKMNSRTWVY\', \'N\': \'ACBDGHKMNSRTWVY\'}\r\n-\r\n-class pattern(object):\r\n-\r\n-\r\n-    def __init__(self, pattern_input):\r\n-        s = str(pattern_input)\r\n-        self.upper = s.isupper()\r\n-        self.data = _check_bases(s)\r\n-        self.pattern = s\r\n-\r\n-    def plan_ambiguity(self):\r\n-        val = Bio.Data.IUPACData.ambiguous_dna_values\r\n-        re_pattern = ""\r\n-        for el in self.pattern:\r\n-            re_pattern = re_pattern + "[" + val[el] + "]"\r\n-        return re_pattern\r\n-\r\n-class annotated_genome(object):\r\n-\r\n-    def __init__(self, seq):\r\n-        s = str(seq)\r\n-        self.upper = s.isupper()\r\n-        self.data = _check_bases(s)\r\n-        self.seq = s\r\n-\r\n-    def codon_usage(self, codonTable):\r\n-\r\n-        codon_usage = {}\r\n-        tmp = [x for x in re.split(r\'(\\w{3})\', self.seq) if x != ""]\r\n-\r\n-        b_cod_table = CodonTable.unambiguous_dna_by_name["Bacterial"].forward_table\r\n-        aas = set(b_cod_table.values())\r\n-\r\n-        for aa in aas:\r\n-            codon_usage[aa] = {}\r\n-            for codon in b_cod_table.keys():\r\n-                if b_cod_table[codon] == aa:\r\n-                    codon_usage[aa][codon] = tmp.count(codon)\r\n-\r\n-        tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}\r\n-\r\n-        codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])\r\n-        codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])\r\n-        codon_usage_[\'Proportion\'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))\r\n-\r\n-        codon_usage_.reset_index(inplace=True)\r\n-        codon_usage_.index = codon_usage_["Codon"]\r\n-\r\n-        return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}\r\n-\r\n-class plasmid(object):\r\n-    """\r\n-    This class represents a circular plasmid\r\n-    """\r\n-\r\n-    def __init__(self, seq = "", circular=True, features = None):\r\n-\r\n-        if type(seq) in [Bio.SeqRecord.SeqRecord, plasmid, Seq]:\r\n-            s = str(seq.seq)\r\n-            self.features = seq.features\r\n-        else:\r\n-            s = str(seq)\r\n-            i'..b'codons:\r\n-        b_cod_table[cod] = "_Stop"\r\n-\r\n-    for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:\r\n-        #print(cod)\r\n-        b_cod_table[cod] = b_cod_table[cod]\r\n-\r\n-    aas = set(b_cod_table.values())\r\n-\r\n-    for aa in aas:\r\n-        #print(aa)\r\n-        #codon_usage[aa] = {}\r\n-        for codon in b_cod_table.keys():\r\n-            if b_cod_table[codon] == aa:\r\n-                codon_usage[codon] = tmp.count(codon.split(" ")[0])\r\n-\r\n-    return codon_usage\r\n-\r\n-\r\n-def read_annotated_genome(data="example.fna", type_="fasta"):\r\n-    """\r\n-    Accepted formats:\r\n-        - fasta (multifasta)\r\n-        - gbk\r\n-\r\n-    """\r\n-\r\n-    seqs = ""\r\n-\r\n-    if type_ == "fasta":\r\n-        with open(data, "rU") as handle:\r\n-            for record in SeqIO.parse(handle, type_):\r\n-                seqs = seqs + str(record.seq)\r\n-\r\n-    elif type_ == "genbank":\r\n-        with open(data, "rU") as input_handle:\r\n-            types = []\r\n-            for record in SeqIO.parse(input_handle, "genbank"):\r\n-                for feature in record.features:\r\n-                    types.append(feature.type)\r\n-                    if feature.type == "CDS":\r\n-                        if feature.location.strand == +1:\r\n-                            seq = record.seq[feature.location.start:feature.location.end]\r\n-                            seqs = seqs + str(seq)\r\n-                        elif feature.location.strand == -1:\r\n-                            seq = record.seq[feature.location.start:\r\n-                                             feature.location.end].reverse_complement()\r\n-                            seqs = seqs + str(seq)\r\n-    return seqs\r\n-\r\n-\r\n-def synonims_(table_name="Bacterial"):\r\n-\r\n-    b_cod_table = CodonTable.unambiguous_dna_by_name[table_name].forward_table\r\n-\r\n-    print(b_cod_table)\r\n-\r\n-    for cod in CodonTable.unambiguous_dna_by_name[table_name].stop_codons:\r\n-        b_cod_table[cod] = "_Stop"\r\n-\r\n-    for cod in CodonTable.unambiguous_dna_by_name[table_name].start_codons:\r\n-        b_cod_table[cod] = "_Start"\r\n-\r\n-    #pprint(b_cod_table)\r\n-    codons = {}\r\n-\r\n-    aas = set(b_cod_table.values())\r\n-\r\n-    for aa in aas:\r\n-        codons[aa] = []\r\n-        for codon in b_cod_table.keys():\r\n-            if b_cod_table[codon] == aa:\r\n-                codons[aa].append(codon)\r\n-\r\n-        #break\r\n-\r\n-    synonims = {}\r\n-\r\n-    for el1 in codons.keys():\r\n-        print(el1)\r\n-        for el2 in codons[el1]:\r\n-            print(el2)\r\n-            synonims[el2] = codons[el1]\r\n-            #synonims[el2] = []\r\n-            #for el3 in codons[el1]#set.difference(set(codons[el1]), {el2}):\r\n-            #    print(el3)\r\n-            #    synonims[el2].append(el3)\r\n-                #break\r\n-            #break\r\n-        #break\r\n-\r\n-\r\n-    anti_codons = {}\r\n-\r\n-    for codon in synonims.keys():\r\n-            tmp_codon = Bio.Seq.Seq(codon, IUPAC.unambiguous_dna)\r\n-            tmp_anticodon = str(tmp_codon.reverse_complement())\r\n-\r\n-            anti_codons[tmp_anticodon] = []\r\n-\r\n-            for synonim in synonims[codon]:\r\n-                    tmp_synonim = Bio.Seq.Seq(synonim, IUPAC.unambiguous_dna)\r\n-                    tmp_antisynonim = str(tmp_synonim.reverse_complement())\r\n-                    anti_codons[tmp_anticodon].append(tmp_antisynonim)\r\n-\r\n-    check = Bio.Seq.Seq("CTT")\r\n-    anti_check = check.reverse_complement()\r\n-    print("\\nCheck:\\n" + str(check))\r\n-    print("\\nCodons:\\n")\r\n-\r\n-    for key in codons.keys():\r\n-        if str(check) in codons[key]:\r\n-            print(codons[key])\r\n-\r\n-    #pprint(codons)\r\n-    print("\\nSynonims:\\n")\r\n-    pprint(synonims[str(check)])\r\n-    print("\\nAnti_Codons:\\n")\r\n-    pprint(anti_codons[str(anti_check)])\r\n-\r\n-    #i = synonims.keys()\r\n-    #right = True\r\n-    #while len(i) > 0:\r\n-    #    tmp = i.pop()\r\n-    #    check = Bio.Seq.Seq(tmp)\r\n-    #    anti_check = check.reverse_complement()\r\n-\r\n-\r\n-    return {"synonims":synonims, "anti_synonims":anti_codons}\r\n'
b
diff -r 5397da1ef896 -r 1c31d6d25429 syngenic.pyc
b
Binary file syngenic.pyc has changed