Repository 'map_peptides_to_bed'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/map_peptides_to_bed

Changeset 3:704ea6303c4c (2020-04-07)
Previous changeset 2:78b8213e122d (2016-12-15)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/map_peptides_to_bed commit 2a470e2c775a7427aa530e058510e4dc7b6d8e80"
modified:
map_peptides_to_bed.py
map_peptides_to_bed.xml
test-data/mapped_peptides.bed
b
diff -r 78b8213e122d -r 704ea6303c4c map_peptides_to_bed.py
--- a/map_peptides_to_bed.py Thu Dec 15 18:36:55 2016 -0500
+++ b/map_peptides_to_bed.py Tue Apr 07 11:41:15 2020 -0400
[
b'@@ -10,324 +10,342 @@\n #  James E Johnson\n #\n #------------------------------------------------------------------------------\n-"""\n-\n-"""\n Input: list of protein_accessions, peptide_sequence\n-       GFF3 with fasta \n+       GFF3 with fasta\n Output: GFF3 of peptides\n \n Filter: Must cross splice boundary\n-  \n """\n \n-import sys,re,os.path\n-import tempfile\n import optparse\n-from optparse import OptionParser\n-from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate\n+import os.path\n+import sys\n+\n+from Bio.Seq import (\n+    reverse_complement,\n+    translate\n+)\n+\n+\n+class BedEntry(object):\n+    def __init__(self, line):\n+        self.line = line\n+        try:\n+            fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+            (chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts) = fields[0:12]\n+            seq = fields[12] if len(fields) > 12 else None\n+            self.chrom = chrom\n+            self.chromStart = int(chromStart)\n+            self.chromEnd = int(chromEnd)\n+            self.name = name\n+            self.score = int(score)\n+            self.strand = strand\n+            self.thickStart = int(thickStart)\n+            self.thickEnd = int(thickEnd)\n+            self.itemRgb = itemRgb\n+            self.blockCount = int(blockCount)\n+            self.blockSizes = [int(x) for x in blockSizes.split(\',\')]\n+            self.blockStarts = [int(x) for x in blockStarts.split(\',\')]\n+            self.seq = seq\n+        except Exception as e:\n+            sys.stderr.write("Unable to read Bed entry %s \\n" % e)\n+            exit(1)\n+\n+    def __str__(self):\n+        return \'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s%s\' % (\n+               self.chrom, self.chromStart, self.chromEnd, self.name, self.score, self.strand, self.thickStart, self.thickEnd, self.itemRgb, self.blockCount,\n+               \',\'.join([str(x) for x in self.blockSizes]),\n+               \',\'.join([str(x) for x in self.blockStarts]),\n+               \'\\t%s\' % self.seq if self.seq else \'\')\n+\n+    def get_splice_junctions(self):\n+        splice_juncs = []\n+        for i in range(self.blockCount - 1):\n+            splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i + 1])\n+            splice_juncs.append(splice_junc)\n+        return splice_juncs\n+\n+    def get_exon_seqs(self):\n+        exons = []\n+        for i in range(self.blockCount):\n+            # splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i+1])\n+            exons.append(self.seq[self.blockStarts[i]:self.blockStarts[i] + self.blockSizes[i]])\n+        if self.strand == \'-\':  # reverse complement\n+            exons.reverse()\n+            for i, s in enumerate(exons):\n+                exons[i] = reverse_complement(s)\n+        return exons\n \n-class BedEntry( object ):\n-  def __init__(self, line):\n-    self.line = line\n-    try:\n-      fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n-      (chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts) = fields[0:12]\n-      seq = fields[12] if len(fields) > 12 else None\n-      self.chrom = chrom\n-      self.chromStart = int(chromStart)\n-      self.chromEnd = int(chromEnd)\n-      self.name = name\n-      self.score = int(score)\n-      self.strand = strand\n-      self.thickStart = int(thickStart)\n-      self.thickEnd = int(thickEnd)\n-      self.itemRgb = itemRgb\n-      self.blockCount = int(blockCount)\n-      self.blockSizes = [int(x) for x in blockSizes.split(\',\')]\n-      self.blockStarts = [int(x) for x in blockStarts.split(\',\')]\n-      self.seq = seq\n-    except Exception, e:\n-      print >> sys.stderr, "Unable to read Bed entry" % e\n-      exit(1)\n-  def __str__(self):\n-    return \'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s%s\' % (\n-      self.chrom, self.chromStart, self.chromEnd, self.name, self.score, self.strand,'..b'    if bed_fh:\n-              entry.thickStart = pepStart\n-              entry.thickEnd = pepEnd\n-              bedfields = str(entry).split(\'\\t\')\n-              if options.gffTags:\n-                bedfields[3] = "ID=%s;Name=%s" % (entry.name,peptide) \n-              bed_fh.write("%s\\t%s\\t%s\\n" % (\'\\t\'.join(bedfields[:12]),peptide,entry.seq))\n-  except Exception, e:\n-    print >> sys.stderr, "failed: Error reading %s - %s" % (options.input if options.input else \'stdin\',e)\n+        for i, line in enumerate(inputFile):\n+            # print >> sys.stderr, "%3d\\t%s\\n" % (i, line)\n+            if line.startswith(\'#\'):\n+                continue\n+            fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+            # print >> sys.stderr, "%3d\\t%s\\n" % (i, fields)\n+            if peptide_column < len(fields):\n+                peptide = fields[peptide_column]\n+                prot_name = fields[name_column] if name_column is not None and name_column < len(fields) else None\n+                if prot_name:\n+                    offset = fields[start_column] if start_column is not None and start_column < len(fields) else -1\n+                    if prot_name not in prot_peps:\n+                        prot_peps[prot_name] = dict()\n+                    prot_peps[prot_name][peptide] = offset\n+                else:\n+                    unassigned_peps.add(peptide)\n+        if options.debug:\n+            sys.stderr.write("prot_peps: %s\\n" % prot_peps)\n+            sys.stderr.write("unassigned_peps: %s\\n" % unassigned_peps)\n+    except Exception as e:\n+        sys.stderr.write("failed: Error reading %s - %s\\n" % (options.input if options.input else \'stdin\', e))\n+        exit(1)\n+    # Output files\n+    bed_fh = None\n+    if options.bed:\n+        bed_fh = open(options.bed, \'w\')\n+        bed_fh.write(\'track name="%s" type=bedDetail description="%s" \\n\' % (\'novel_junction_peptides\', \'test\'))\n+        if options.gffTags:\n+            bed_fh.write(\'#gffTags\\n\')\n+    # if options.gff:\n+    #   gff_fh = open(options.gff, \'w\')\n+    #   gff_fh.write("##gff-version 3.2.1\\n")\n+    #   if options.reference:\n+    #    gff_fh.write("##genome-build %s %s\\n" % (options.refsource if options.refsource else \'unknown\', options.reference))\n+    try:\n+        for i, line in enumerate(inputBed):\n+            # print >> sys.stderr, "%3d:\\t%s\\n" % (i, line)\n+            if line.startswith(\'track\'):\n+                continue\n+            entry = BedEntry(line)\n+            if entry.name in prot_peps:\n+                for (peptide, offset) in prot_peps[entry.name].items():\n+                    if offset < 0:\n+                        offset = entry.seq.find(peptide)\n+                        if options.debug:\n+                            sys.stderr.write("%s\\t%s\\t%d\\t%s\\n" % (entry.name, peptide, offset, entry.seq))\n+                    if offset >= 0:\n+                        tstart = offset * 3\n+                        tstop = tstart + len(peptide) * 3\n+                        if options.debug:\n+                            sys.stderr.write("%d\\t%d\\t%d\\n" % (offset, tstart, tstop))\n+                        (pepStart, pepEnd) = entry.get_subrange(tstart, tstop)\n+                        if options.debug:\n+                            sys.stderr.write("%d\\t%d\\t%d\\n" % (offset, pepStart, pepEnd))\n+                        if bed_fh:\n+                            entry.thickStart = pepStart\n+                            entry.thickEnd = pepEnd\n+                            bedfields = str(entry).split(\'\\t\')\n+                            if options.gffTags:\n+                                bedfields[3] = "ID=%s;Name=%s" % (entry.name, peptide)\n+                            bed_fh.write("%s\\t%s\\t%s\\n" % (\'\\t\'.join(bedfields[:12]), peptide, entry.seq))\n+    except Exception as e:\n+        sys.stderr.write("failed: Error reading %s - %s\\n" % (options.input if options.input else \'stdin\', e))\n+        raise\n \n-if __name__ == "__main__" : __main__()\n \n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 78b8213e122d -r 704ea6303c4c map_peptides_to_bed.xml
--- a/map_peptides_to_bed.xml Thu Dec 15 18:36:55 2016 -0500
+++ b/map_peptides_to_bed.xml Tue Apr 07 11:41:15 2020 -0400
b
@@ -1,7 +1,7 @@
-<tool id="map_peptides_to_bed" name="Map peptides to a bed file" version="0.1.1">
+<tool id="map_peptides_to_bed" name="Map peptides to a bed file" version="0.2">
     <description>for viewing in a genome browser</description>
     <requirements>
-        <requirement type="package" version="1.62">biopython</requirement>
+        <requirement type="package" version="1.76">biopython</requirement>
     </requirements>
     <stdio>
         <exit_code range="1:" />
b
diff -r 78b8213e122d -r 704ea6303c4c test-data/mapped_peptides.bed
--- a/test-data/mapped_peptides.bed Thu Dec 15 18:36:55 2016 -0500
+++ b/test-data/mapped_peptides.bed Tue Apr 07 11:41:15 2020 -0400
b
@@ -3,11 +3,11 @@
 15 40902460 40907575 ID=JUNC00019210_2;Name=RNGRNKKLEDNYCEIT 1 + 40902484 40907575 255,0,0 2 35,37 0,5078 RNGRNKKLEDNYCEIT SYENSEKVRNGRNKKLEDNYCEIT
 15 40902460 40907575 ID=JUNC00019210_2;Name=SYENSEKVR 1 + 40902460 40902487 255,0,0 2 35,37 0,5078 SYENSEKVR SYENSEKVRNGRNKKLEDNYCEIT
 15 40902461 40907549 ID=JUNC00019210_3;Name=KIVRKSEMEGI 1 + 40902467 40907543 255,0,0 2 34,11 0,5077 KIVRKSEMEGI HMKIVRKSEMEGIRN
-9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHV 1 - 17421 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHV APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE
 9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHVMLGAE 1 - 17406 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHVMLGAE APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE
 9 17406 18053 ID=JUNC00000003_1;Name=APWTSGPCRYKKYVF 1 - 17460 18053 255,0,0 2 73,26 0,621 APWTSGPCRYKKYVF APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE
+9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHV 1 - 17421 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHV APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE
+9 17404 18051 ID=JUNC00000003_3;Name=PLDERALQVQEVCLPG 1 - 17455 18051 255,0,0 2 75,24 0,623 PLDERALQVQEVCLPG PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD
 9 17404 18051 ID=JUNC00000003_3;Name=CLPGPPGWCCNKDPCDAGGRD 1 - 17404 17467 255,0,0 2 75,24 0,623 CLPGPPGWCCNKDPCDAGGRD PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD
-9 17404 18051 ID=JUNC00000003_3;Name=PLDERALQVQEVCLPG 1 - 17455 18051 255,0,0 2 75,24 0,623 PLDERALQVQEVCLPG PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD
 8 27369376 27370079 ID=JUNC00000874_2;Name=PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA 1 + 27369376 27370079 255,0,0 2 51,48 0,655 PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA
 8 27369376 27370079 ID=JUNC00000874_2;Name=DMSHGYVTVKGYHKA 1 + 27369397 27370046 255,0,0 2 51,48 0,655 DMSHGYVTVKGYHKA PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA
 7 148909514 148910831 ID=JUNC00002152_1;Name=DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS 1 + 148909514 148910831 255,0,0 2 60,39 0,1278 DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS
@@ -15,13 +15,13 @@
 7 148909515 148910811 ID=JUNC00002152_2;Name=IWQTEIFPRI 1 + 148909524 148909554 255,0,0 2 59,19 0,1277 IWQTEIFPRI ISRIWQTEIFPRIPIQVRTRVSHLST
 7 148909515 148910811 ID=JUNC00002152_2;Name=IFPRIPIQVRTRVSHL 1 + 148909539 148910805 255,0,0 2 59,19 0,1277 IFPRIPIQVRTRVSHL ISRIWQTEIFPRIPIQVRTRVSHLST
 6 41766614 41767580 ID=JUNC00002625_1;Name=LKDSGGLAVIIERRLGSMSSLT 1 - 41766614 41767580 255,0,0 2 53,13 0,953 LKDSGGLAVIIERRLGSMSSLT LKDSGGLAVIIERRLGSMSSLT
+6 41766614 41767580 ID=JUNC00002625_1;Name=GLAVIIERRLGSMSS 1 - 41766620 41766665 255,0,0 2 53,13 0,953 GLAVIIERRLGSMSS LKDSGGLAVIIERRLGSMSSLT
 6 41766614 41767580 ID=JUNC00002625_1;Name=DSGGLAVIIERR 1 - 41766638 41767574 255,0,0 2 53,13 0,953 DSGGLAVIIERR LKDSGGLAVIIERRLGSMSSLT
-6 41766614 41767580 ID=JUNC00002625_1;Name=GLAVIIERRLGSMSS 1 - 41766620 41766665 255,0,0 2 53,13 0,953 GLAVIIERRLGSMSS LKDSGGLAVIIERRLGSMSSLT
+6 41766612 41767578 ID=JUNC00002625_3;Name=FRWSGR 1 - 41766654 41767572 255,0,0 2 55,11 0,955 FRWSGR KRFRWSGRNHREKIGVHVVFDQ
 6 41766612 41767578 ID=JUNC00002625_3;Name=KRFRWSGRNHREKIGVHVVFDQ 1 - 41766612 41767578 255,0,0 2 55,11 0,955 KRFRWSGRNHREKIGVHVVFDQ KRFRWSGRNHREKIGVHVVFDQ
-6 41766612 41767578 ID=JUNC00002625_3;Name=FRWSGR 1 - 41766654 41767572 255,0,0 2 55,11 0,955 FRWSGR KRFRWSGRNHREKIGVHVVFDQ
 6 41766612 41767578 ID=JUNC00002625_3;Name=NHREKIGVHVVFD 1 - 41766615 41766654 255,0,0 2 55,11 0,955 NHREKIGVHVVFD KRFRWSGRNHREKIGVHVVFDQ
 6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAVMNQFENSMRRYL 1 - 84856497 84862316 255,0,0 2 7,53 0,5766 LKMKSEAVMNQFENSMRRYL LKMKSEAVMNQFENSMRRYL
-6 84856497 84862316 ID=JUNC00002772_1;Name=MNQFENSMRRYL 1 - 84856497 84862292 255,0,0 2 7,53 0,5766 MNQFENSMRRYL LKMKSEAVMNQFENSMRRYL
 6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAVMNQFEN 1 - 84862274 84862316 255,0,0 2 7,53 0,5766 LKMKSEAVMNQFEN LKMKSEAVMNQFENSMRRYL
 6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAV 1 - 84862292 84862316 255,0,0 2 7,53 0,5766 LKMKSEAV LKMKSEAVMNQFENSMRRYL
+6 84856497 84862316 ID=JUNC00002772_1;Name=MNQFENSMRRYL 1 - 84856497 84862292 255,0,0 2 7,53 0,5766 MNQFENSMRRYL LKMKSEAVMNQFENSMRRYL
 6 84856497 84862316 ID=JUNC00002772_1;Name=KSEAVMNQFENSMR 1 - 84862265 84862307 255,0,0 2 7,53 0,5766 KSEAVMNQFENSMR LKMKSEAVMNQFENSMRRYL