Previous changeset 2:78b8213e122d (2016-12-15) |
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/map_peptides_to_bed commit 2a470e2c775a7427aa530e058510e4dc7b6d8e80" |
modified:
map_peptides_to_bed.py map_peptides_to_bed.xml test-data/mapped_peptides.bed |
b |
diff -r 78b8213e122d -r 704ea6303c4c map_peptides_to_bed.py --- a/map_peptides_to_bed.py Thu Dec 15 18:36:55 2016 -0500 +++ b/map_peptides_to_bed.py Tue Apr 07 11:41:15 2020 -0400 |
[ |
b'@@ -10,324 +10,342 @@\n # James E Johnson\n #\n #------------------------------------------------------------------------------\n-"""\n-\n-"""\n Input: list of protein_accessions, peptide_sequence\n- GFF3 with fasta \n+ GFF3 with fasta\n Output: GFF3 of peptides\n \n Filter: Must cross splice boundary\n- \n """\n \n-import sys,re,os.path\n-import tempfile\n import optparse\n-from optparse import OptionParser\n-from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate\n+import os.path\n+import sys\n+\n+from Bio.Seq import (\n+ reverse_complement,\n+ translate\n+)\n+\n+\n+class BedEntry(object):\n+ def __init__(self, line):\n+ self.line = line\n+ try:\n+ fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+ (chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts) = fields[0:12]\n+ seq = fields[12] if len(fields) > 12 else None\n+ self.chrom = chrom\n+ self.chromStart = int(chromStart)\n+ self.chromEnd = int(chromEnd)\n+ self.name = name\n+ self.score = int(score)\n+ self.strand = strand\n+ self.thickStart = int(thickStart)\n+ self.thickEnd = int(thickEnd)\n+ self.itemRgb = itemRgb\n+ self.blockCount = int(blockCount)\n+ self.blockSizes = [int(x) for x in blockSizes.split(\',\')]\n+ self.blockStarts = [int(x) for x in blockStarts.split(\',\')]\n+ self.seq = seq\n+ except Exception as e:\n+ sys.stderr.write("Unable to read Bed entry %s \\n" % e)\n+ exit(1)\n+\n+ def __str__(self):\n+ return \'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s%s\' % (\n+ self.chrom, self.chromStart, self.chromEnd, self.name, self.score, self.strand, self.thickStart, self.thickEnd, self.itemRgb, self.blockCount,\n+ \',\'.join([str(x) for x in self.blockSizes]),\n+ \',\'.join([str(x) for x in self.blockStarts]),\n+ \'\\t%s\' % self.seq if self.seq else \'\')\n+\n+ def get_splice_junctions(self):\n+ splice_juncs = []\n+ for i in range(self.blockCount - 1):\n+ splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i + 1])\n+ splice_juncs.append(splice_junc)\n+ return splice_juncs\n+\n+ def get_exon_seqs(self):\n+ exons = []\n+ for i in range(self.blockCount):\n+ # splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i+1])\n+ exons.append(self.seq[self.blockStarts[i]:self.blockStarts[i] + self.blockSizes[i]])\n+ if self.strand == \'-\': # reverse complement\n+ exons.reverse()\n+ for i, s in enumerate(exons):\n+ exons[i] = reverse_complement(s)\n+ return exons\n \n-class BedEntry( object ):\n- def __init__(self, line):\n- self.line = line\n- try:\n- fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n- (chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts) = fields[0:12]\n- seq = fields[12] if len(fields) > 12 else None\n- self.chrom = chrom\n- self.chromStart = int(chromStart)\n- self.chromEnd = int(chromEnd)\n- self.name = name\n- self.score = int(score)\n- self.strand = strand\n- self.thickStart = int(thickStart)\n- self.thickEnd = int(thickEnd)\n- self.itemRgb = itemRgb\n- self.blockCount = int(blockCount)\n- self.blockSizes = [int(x) for x in blockSizes.split(\',\')]\n- self.blockStarts = [int(x) for x in blockStarts.split(\',\')]\n- self.seq = seq\n- except Exception, e:\n- print >> sys.stderr, "Unable to read Bed entry" % e\n- exit(1)\n- def __str__(self):\n- return \'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s%s\' % (\n- self.chrom, self.chromStart, self.chromEnd, self.name, self.score, self.strand,'..b' if bed_fh:\n- entry.thickStart = pepStart\n- entry.thickEnd = pepEnd\n- bedfields = str(entry).split(\'\\t\')\n- if options.gffTags:\n- bedfields[3] = "ID=%s;Name=%s" % (entry.name,peptide) \n- bed_fh.write("%s\\t%s\\t%s\\n" % (\'\\t\'.join(bedfields[:12]),peptide,entry.seq))\n- except Exception, e:\n- print >> sys.stderr, "failed: Error reading %s - %s" % (options.input if options.input else \'stdin\',e)\n+ for i, line in enumerate(inputFile):\n+ # print >> sys.stderr, "%3d\\t%s\\n" % (i, line)\n+ if line.startswith(\'#\'):\n+ continue\n+ fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+ # print >> sys.stderr, "%3d\\t%s\\n" % (i, fields)\n+ if peptide_column < len(fields):\n+ peptide = fields[peptide_column]\n+ prot_name = fields[name_column] if name_column is not None and name_column < len(fields) else None\n+ if prot_name:\n+ offset = fields[start_column] if start_column is not None and start_column < len(fields) else -1\n+ if prot_name not in prot_peps:\n+ prot_peps[prot_name] = dict()\n+ prot_peps[prot_name][peptide] = offset\n+ else:\n+ unassigned_peps.add(peptide)\n+ if options.debug:\n+ sys.stderr.write("prot_peps: %s\\n" % prot_peps)\n+ sys.stderr.write("unassigned_peps: %s\\n" % unassigned_peps)\n+ except Exception as e:\n+ sys.stderr.write("failed: Error reading %s - %s\\n" % (options.input if options.input else \'stdin\', e))\n+ exit(1)\n+ # Output files\n+ bed_fh = None\n+ if options.bed:\n+ bed_fh = open(options.bed, \'w\')\n+ bed_fh.write(\'track name="%s" type=bedDetail description="%s" \\n\' % (\'novel_junction_peptides\', \'test\'))\n+ if options.gffTags:\n+ bed_fh.write(\'#gffTags\\n\')\n+ # if options.gff:\n+ # gff_fh = open(options.gff, \'w\')\n+ # gff_fh.write("##gff-version 3.2.1\\n")\n+ # if options.reference:\n+ # gff_fh.write("##genome-build %s %s\\n" % (options.refsource if options.refsource else \'unknown\', options.reference))\n+ try:\n+ for i, line in enumerate(inputBed):\n+ # print >> sys.stderr, "%3d:\\t%s\\n" % (i, line)\n+ if line.startswith(\'track\'):\n+ continue\n+ entry = BedEntry(line)\n+ if entry.name in prot_peps:\n+ for (peptide, offset) in prot_peps[entry.name].items():\n+ if offset < 0:\n+ offset = entry.seq.find(peptide)\n+ if options.debug:\n+ sys.stderr.write("%s\\t%s\\t%d\\t%s\\n" % (entry.name, peptide, offset, entry.seq))\n+ if offset >= 0:\n+ tstart = offset * 3\n+ tstop = tstart + len(peptide) * 3\n+ if options.debug:\n+ sys.stderr.write("%d\\t%d\\t%d\\n" % (offset, tstart, tstop))\n+ (pepStart, pepEnd) = entry.get_subrange(tstart, tstop)\n+ if options.debug:\n+ sys.stderr.write("%d\\t%d\\t%d\\n" % (offset, pepStart, pepEnd))\n+ if bed_fh:\n+ entry.thickStart = pepStart\n+ entry.thickEnd = pepEnd\n+ bedfields = str(entry).split(\'\\t\')\n+ if options.gffTags:\n+ bedfields[3] = "ID=%s;Name=%s" % (entry.name, peptide)\n+ bed_fh.write("%s\\t%s\\t%s\\n" % (\'\\t\'.join(bedfields[:12]), peptide, entry.seq))\n+ except Exception as e:\n+ sys.stderr.write("failed: Error reading %s - %s\\n" % (options.input if options.input else \'stdin\', e))\n+ raise\n \n-if __name__ == "__main__" : __main__()\n \n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 78b8213e122d -r 704ea6303c4c map_peptides_to_bed.xml --- a/map_peptides_to_bed.xml Thu Dec 15 18:36:55 2016 -0500 +++ b/map_peptides_to_bed.xml Tue Apr 07 11:41:15 2020 -0400 |
b |
@@ -1,7 +1,7 @@ -<tool id="map_peptides_to_bed" name="Map peptides to a bed file" version="0.1.1"> +<tool id="map_peptides_to_bed" name="Map peptides to a bed file" version="0.2"> <description>for viewing in a genome browser</description> <requirements> - <requirement type="package" version="1.62">biopython</requirement> + <requirement type="package" version="1.76">biopython</requirement> </requirements> <stdio> <exit_code range="1:" /> |
b |
diff -r 78b8213e122d -r 704ea6303c4c test-data/mapped_peptides.bed --- a/test-data/mapped_peptides.bed Thu Dec 15 18:36:55 2016 -0500 +++ b/test-data/mapped_peptides.bed Tue Apr 07 11:41:15 2020 -0400 |
b |
@@ -3,11 +3,11 @@ 15 40902460 40907575 ID=JUNC00019210_2;Name=RNGRNKKLEDNYCEIT 1 + 40902484 40907575 255,0,0 2 35,37 0,5078 RNGRNKKLEDNYCEIT SYENSEKVRNGRNKKLEDNYCEIT 15 40902460 40907575 ID=JUNC00019210_2;Name=SYENSEKVR 1 + 40902460 40902487 255,0,0 2 35,37 0,5078 SYENSEKVR SYENSEKVRNGRNKKLEDNYCEIT 15 40902461 40907549 ID=JUNC00019210_3;Name=KIVRKSEMEGI 1 + 40902467 40907543 255,0,0 2 34,11 0,5077 KIVRKSEMEGI HMKIVRKSEMEGIRN -9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHV 1 - 17421 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHV APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE 9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHVMLGAE 1 - 17406 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHVMLGAE APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE 9 17406 18053 ID=JUNC00000003_1;Name=APWTSGPCRYKKYVF 1 - 17460 18053 255,0,0 2 73,26 0,621 APWTSGPCRYKKYVF APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE +9 17406 18053 ID=JUNC00000003_1;Name=LDPLAGAVTKTHV 1 - 17421 17460 255,0,0 2 73,26 0,621 LDPLAGAVTKTHV APWTSGPCRYKKYVFLDPLAGAVTKTHVMLGAE +9 17404 18051 ID=JUNC00000003_3;Name=PLDERALQVQEVCLPG 1 - 17455 18051 255,0,0 2 75,24 0,623 PLDERALQVQEVCLPG PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD 9 17404 18051 ID=JUNC00000003_3;Name=CLPGPPGWCCNKDPCDAGGRD 1 - 17404 17467 255,0,0 2 75,24 0,623 CLPGPPGWCCNKDPCDAGGRD PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD -9 17404 18051 ID=JUNC00000003_3;Name=PLDERALQVQEVCLPG 1 - 17455 18051 255,0,0 2 75,24 0,623 PLDERALQVQEVCLPG PLDERALQVQEVCLPGPPGWCCNKDPCDAGGRD 8 27369376 27370079 ID=JUNC00000874_2;Name=PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA 1 + 27369376 27370079 255,0,0 2 51,48 0,655 PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA 8 27369376 27370079 ID=JUNC00000874_2;Name=DMSHGYVTVKGYHKA 1 + 27369397 27370046 255,0,0 2 51,48 0,655 DMSHGYVTVKGYHKA PTSCNPSDMSHGYVTVKGYHKAKATHRGPWLVA 7 148909514 148910831 ID=JUNC00002152_1;Name=DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS 1 + 148909514 148910831 255,0,0 2 60,39 0,1278 DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS DQQDLADRDIPTDPNSGENKSLSSQHMTFCHGS @@ -15,13 +15,13 @@ 7 148909515 148910811 ID=JUNC00002152_2;Name=IWQTEIFPRI 1 + 148909524 148909554 255,0,0 2 59,19 0,1277 IWQTEIFPRI ISRIWQTEIFPRIPIQVRTRVSHLST 7 148909515 148910811 ID=JUNC00002152_2;Name=IFPRIPIQVRTRVSHL 1 + 148909539 148910805 255,0,0 2 59,19 0,1277 IFPRIPIQVRTRVSHL ISRIWQTEIFPRIPIQVRTRVSHLST 6 41766614 41767580 ID=JUNC00002625_1;Name=LKDSGGLAVIIERRLGSMSSLT 1 - 41766614 41767580 255,0,0 2 53,13 0,953 LKDSGGLAVIIERRLGSMSSLT LKDSGGLAVIIERRLGSMSSLT +6 41766614 41767580 ID=JUNC00002625_1;Name=GLAVIIERRLGSMSS 1 - 41766620 41766665 255,0,0 2 53,13 0,953 GLAVIIERRLGSMSS LKDSGGLAVIIERRLGSMSSLT 6 41766614 41767580 ID=JUNC00002625_1;Name=DSGGLAVIIERR 1 - 41766638 41767574 255,0,0 2 53,13 0,953 DSGGLAVIIERR LKDSGGLAVIIERRLGSMSSLT -6 41766614 41767580 ID=JUNC00002625_1;Name=GLAVIIERRLGSMSS 1 - 41766620 41766665 255,0,0 2 53,13 0,953 GLAVIIERRLGSMSS LKDSGGLAVIIERRLGSMSSLT +6 41766612 41767578 ID=JUNC00002625_3;Name=FRWSGR 1 - 41766654 41767572 255,0,0 2 55,11 0,955 FRWSGR KRFRWSGRNHREKIGVHVVFDQ 6 41766612 41767578 ID=JUNC00002625_3;Name=KRFRWSGRNHREKIGVHVVFDQ 1 - 41766612 41767578 255,0,0 2 55,11 0,955 KRFRWSGRNHREKIGVHVVFDQ KRFRWSGRNHREKIGVHVVFDQ -6 41766612 41767578 ID=JUNC00002625_3;Name=FRWSGR 1 - 41766654 41767572 255,0,0 2 55,11 0,955 FRWSGR KRFRWSGRNHREKIGVHVVFDQ 6 41766612 41767578 ID=JUNC00002625_3;Name=NHREKIGVHVVFD 1 - 41766615 41766654 255,0,0 2 55,11 0,955 NHREKIGVHVVFD KRFRWSGRNHREKIGVHVVFDQ 6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAVMNQFENSMRRYL 1 - 84856497 84862316 255,0,0 2 7,53 0,5766 LKMKSEAVMNQFENSMRRYL LKMKSEAVMNQFENSMRRYL -6 84856497 84862316 ID=JUNC00002772_1;Name=MNQFENSMRRYL 1 - 84856497 84862292 255,0,0 2 7,53 0,5766 MNQFENSMRRYL LKMKSEAVMNQFENSMRRYL 6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAVMNQFEN 1 - 84862274 84862316 255,0,0 2 7,53 0,5766 LKMKSEAVMNQFEN LKMKSEAVMNQFENSMRRYL 6 84856497 84862316 ID=JUNC00002772_1;Name=LKMKSEAV 1 - 84862292 84862316 255,0,0 2 7,53 0,5766 LKMKSEAV LKMKSEAVMNQFENSMRRYL +6 84856497 84862316 ID=JUNC00002772_1;Name=MNQFENSMRRYL 1 - 84856497 84862292 255,0,0 2 7,53 0,5766 MNQFENSMRRYL LKMKSEAVMNQFENSMRRYL 6 84856497 84862316 ID=JUNC00002772_1;Name=KSEAVMNQFENSMR 1 - 84862265 84862307 255,0,0 2 7,53 0,5766 KSEAVMNQFENSMR LKMKSEAVMNQFENSMRRYL |