Repository 'translate_bed_sequences'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/translate_bed_sequences

Changeset 2:4221664a2bd0 (2020-04-07)
Previous changeset 1:6bbce76c78c1 (2016-12-15)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/translate_bed_sequences commit 2a470e2c775a7427aa530e058510e4dc7b6d8e80"
modified:
translate_bed_sequences.py
translate_bed_sequences.xml
b
diff -r 6bbce76c78c1 -r 4221664a2bd0 translate_bed_sequences.py
--- a/translate_bed_sequences.py Thu Dec 15 18:41:21 2016 -0500
+++ b/translate_bed_sequences.py Tue Apr 07 11:45:53 2020 -0400
[
b'@@ -10,366 +10,390 @@\n #  James E Johnson\n #\n #------------------------------------------------------------------------------\n-"""\n \n-"""\n Input:  BED file (12 column) + 13th sequence column appended by extract_genomic_dna\n Output: Fasta of 3-frame translations of the spliced sequence\n-  \n """\n \n-import sys,re,os.path\n+import optparse\n+import os.path\n+import sys\n import tempfile\n-import optparse\n-from optparse import OptionParser\n-from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate\n+\n+from Bio.Seq import (\n+    reverse_complement,\n+    translate\n+)\n+\n+\n+class BedEntry(object):\n+    def __init__(self, line):\n+        self.line = line\n+        try:\n+            fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+            (chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts) = fields[0:12]\n+            seq = fields[12] if len(fields) > 12 else None\n+            self.chrom = chrom\n+            self.chromStart = int(chromStart)\n+            self.chromEnd = int(chromEnd)\n+            self.name = name\n+            self.score = int(score)\n+            self.strand = strand\n+            self.thickStart = int(thickStart)\n+            self.thickEnd = int(thickEnd)\n+            self.itemRgb = itemRgb\n+            self.blockCount = int(blockCount)\n+            self.blockSizes = [int(x) for x in blockSizes.split(\',\')]\n+            self.blockStarts = [int(x) for x in blockStarts.split(\',\')]\n+            self.seq = seq\n+        except Exception as e:\n+            sys.stderr.write("Unable to read Bed entry %s\\n" % e)\n+            exit(1)\n+\n+    def __str__(self):\n+        return \'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s%s\' % (\n+               self.chrom, self.chromStart, self.chromEnd, self.name, self.score, self.strand, self.thickStart, self.thickEnd, self.itemRgb, self.blockCount,\n+               \',\'.join([str(x) for x in self.blockSizes]),\n+               \',\'.join([str(x) for x in self.blockStarts]),\n+               \'\\t%s\' % self.seq if self.seq else \'\')\n+\n+    def get_splice_junctions(self):\n+        splice_juncs = []\n+        for i in range(self.blockCount - 1):\n+            splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i + 1])\n+            splice_juncs.append(splice_junc)\n+        return splice_juncs\n+\n+    def get_exon_seqs(self):\n+        exons = []\n+        for i in range(self.blockCount):\n+            # splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i+1])\n+            exons.append(self.seq[self.blockStarts[i]:self.blockStarts[i] + self.blockSizes[i]])\n+        if self.strand == \'-\':  # reverse complement\n+            exons.reverse()\n+            for i, s in enumerate(exons):\n+                exons[i] = reverse_complement(s)\n+        return exons\n+\n+    def get_spliced_seq(self):\n+        seq = \'\'.join(self.get_exon_seqs())\n+        return seq\n+\n+    def get_translation(self, sequence=None):\n+        translation = None\n+        seq = sequence if sequence else self.get_spliced_seq()\n+        if seq:\n+            seqlen = int(len(seq) / 3) * 3\n+            if seqlen >= 3:\n+                translation = translate(seq[:seqlen])\n+        return translation\n+\n+    def get_translations(self):\n+        translations = []\n+        seq = self.get_spliced_seq()\n+        if seq:\n+            for i in range(3):\n+                translation = self.get_translation(sequence=seq[i:])\n+                if translation:\n+                    translations.append(translation)\n+        return translations\n+\n+    def get_subrange(self, tstart, tstop):\n+        """\n+        (start, end)\n+        """\n+        chromStart = self.chromStart\n+        chromEnd = self.chromEnd\n+        r = range(self.blockCount)\n+        if self.strand == \'-\':\n+            r = list(r)\n+            r.reverse()\n+        bStart = 0\n+        for x in r:\n+            bEn'..b's.debug)\n+                for i, tx in enumerate(translations):\n+                    if tx:\n+                        (chromStart, chromEnd, translation, blockCount, blockSizes, blockStarts) = tx\n+                        if options.min_length is not None and len(translation) < options.min_length:\n+                            continue\n+                        if options.max_stop_codons is not None and translation.count(\'*\') > options.max_stop_codons:\n+                            continue\n+                        frame_name = \'_%s\' % (i + 1)\n+                        pep_id = "%s%s%s" % (options.id_prefix, entry.name, frame_name)\n+                        if bed_fh:\n+                            bed_fh.write(\'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\t%s\\n\' % (str(entry.chrom), chromStart, chromEnd, pep_id, entry.score, entry.strand, chromStart, chromEnd, entry.itemRgb, blockCount, \',\'.join([str(x) for x in blockSizes]), \',\'.join([str(x) for x in blockStarts]), translation))\n+                        location = "chromosome:%s:%s:%s:%s:%s" % (options.reference, entry.chrom, chromStart, chromEnd, strand)\n+                        score = " %s:%s" % (options.score_name, entry.score) if options.score_name else \'\'\n+                        seq_description = "%s %s%s" % (options.seqtype, location, score)\n+                        seq_id = "%s " % pep_id\n+                        if options.fa_db:\n+                            seq_id = "%s%s%s%s" % (options.fa_db, options.fa_sep, pep_id, options.fa_sep)\n+                        fa_id = "%s%s" % (seq_id, seq_description)\n+                        fa_entry = ">%s\\n%s\\n" % (fa_id, translation)\n+                        outFile.write(fa_entry)\n+                        if gff_fh:\n+                            if gff_fa:\n+                                gff_fa.write(fa_entry)\n+                            gff_fh.write("##sequence-region %s %d %d\\n" % (entry.chrom, chromStart + 1, chromEnd - 1))\n+                            gff_fh.write("%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%d\\tID=%s\\n" % (entry.chrom, \'splice_junc\', \'gene\', chromStart + 1, chromEnd - 1, entry.score, entry.strand, 0, pep_id))\n+                            for x in range(blockCount):\n+                                start = chromStart + blockStarts[x] + 1\n+                                end = start + blockSizes[x] - 1\n+                                phase = (3 - sum(blockSizes[:x]) % 3) % 3\n+                                gff_fh.write("%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%d\\tParent=%s;ID=%s_%d\\n" % (entry.chrom, \'splice_junc\', \'CDS\', start, end, entry.score, entry.strand, phase, pep_id, pep_id, x))\n+                            # ##gff-version 3\n+                            # ##sequence-region 19 1 287484\n+                            # 19      MassSpec        peptide 282299  287484  10.0    -       0       ID=TEARLSFYSGHSSFGMYCMVFLALYVQ\n+                            # 19      MassSpec        CDS     287474  287484  .       -       0       Parent=TEARLSFYSGHSSFGMYCMVFLALYVQ;transcript_id=ENST00000269812\n+                            # 19      MassSpec        CDS     282752  282809  .       -       1       Parent=TEARLSFYSGHSSFGMYCMVFLALYVQ;transcript_id=ENST00000269812\n+                            # 19      MassSpec        CDS     282299  282310  .       -       0       Parent=TEARLSFYSGHSSFGMYCMVFLALYVQ;transcript_id=ENST00000269812\n+        if bed_fh:\n+            bed_fh.close()\n+        if gff_fh:\n+            if gff_fa:\n+                gff_fa.close()\n+            else:\n+                outFile.close()\n+            gff_fa = open(gff_fa_file, \'r\')\n+            gff_fh.write("##FASTA\\n")\n+            for i, line in enumerate(gff_fa):\n+                gff_fh.write(line)\n+            gff_fh.close()\n+    except Exception as e:\n+        sys.stderr.write("failed: Error reading %s - %s\\n" % (options.input if options.input else \'stdin\', e))\n+        raise\n+        exit(1)\n \n-if __name__ == "__main__" : __main__()\n \n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 6bbce76c78c1 -r 4221664a2bd0 translate_bed_sequences.xml
--- a/translate_bed_sequences.xml Thu Dec 15 18:41:21 2016 -0500
+++ b/translate_bed_sequences.xml Tue Apr 07 11:45:53 2020 -0400
b
@@ -1,7 +1,7 @@
-<tool id="translate_bed_sequences" name="Translate BED Sequences" version="0.1.1">
+<tool id="translate_bed_sequences" name="Translate BED Sequences" version="0.2.0">
     <description>3 frame translation of BED augmented with a sequence column</description>
     <requirements>
-        <requirement type="package" version="1.62">biopython</requirement>
+        <requirement type="package" version="1.76">biopython</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" level="fatal" description="Error" />