Previous changeset 1:6bbce76c78c1 (2016-12-15) |
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/translate_bed_sequences commit 2a470e2c775a7427aa530e058510e4dc7b6d8e80" |
modified:
translate_bed_sequences.py translate_bed_sequences.xml |
b |
diff -r 6bbce76c78c1 -r 4221664a2bd0 translate_bed_sequences.py --- a/translate_bed_sequences.py Thu Dec 15 18:41:21 2016 -0500 +++ b/translate_bed_sequences.py Tue Apr 07 11:45:53 2020 -0400 |
[ |
b'@@ -10,366 +10,390 @@\n # James E Johnson\n #\n #------------------------------------------------------------------------------\n-"""\n \n-"""\n Input: BED file (12 column) + 13th sequence column appended by extract_genomic_dna\n Output: Fasta of 3-frame translations of the spliced sequence\n- \n """\n \n-import sys,re,os.path\n+import optparse\n+import os.path\n+import sys\n import tempfile\n-import optparse\n-from optparse import OptionParser\n-from Bio.Seq import reverse_complement, transcribe, back_transcribe, translate\n+\n+from Bio.Seq import (\n+ reverse_complement,\n+ translate\n+)\n+\n+\n+class BedEntry(object):\n+ def __init__(self, line):\n+ self.line = line\n+ try:\n+ fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+ (chrom, chromStart, chromEnd, name, score, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts) = fields[0:12]\n+ seq = fields[12] if len(fields) > 12 else None\n+ self.chrom = chrom\n+ self.chromStart = int(chromStart)\n+ self.chromEnd = int(chromEnd)\n+ self.name = name\n+ self.score = int(score)\n+ self.strand = strand\n+ self.thickStart = int(thickStart)\n+ self.thickEnd = int(thickEnd)\n+ self.itemRgb = itemRgb\n+ self.blockCount = int(blockCount)\n+ self.blockSizes = [int(x) for x in blockSizes.split(\',\')]\n+ self.blockStarts = [int(x) for x in blockStarts.split(\',\')]\n+ self.seq = seq\n+ except Exception as e:\n+ sys.stderr.write("Unable to read Bed entry %s\\n" % e)\n+ exit(1)\n+\n+ def __str__(self):\n+ return \'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s%s\' % (\n+ self.chrom, self.chromStart, self.chromEnd, self.name, self.score, self.strand, self.thickStart, self.thickEnd, self.itemRgb, self.blockCount,\n+ \',\'.join([str(x) for x in self.blockSizes]),\n+ \',\'.join([str(x) for x in self.blockStarts]),\n+ \'\\t%s\' % self.seq if self.seq else \'\')\n+\n+ def get_splice_junctions(self):\n+ splice_juncs = []\n+ for i in range(self.blockCount - 1):\n+ splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i + 1])\n+ splice_juncs.append(splice_junc)\n+ return splice_juncs\n+\n+ def get_exon_seqs(self):\n+ exons = []\n+ for i in range(self.blockCount):\n+ # splice_junc = "%s:%d_%d" % (self.chrom, self.chromStart + self.blockSizes[i], self.chromStart + self.blockStarts[i+1])\n+ exons.append(self.seq[self.blockStarts[i]:self.blockStarts[i] + self.blockSizes[i]])\n+ if self.strand == \'-\': # reverse complement\n+ exons.reverse()\n+ for i, s in enumerate(exons):\n+ exons[i] = reverse_complement(s)\n+ return exons\n+\n+ def get_spliced_seq(self):\n+ seq = \'\'.join(self.get_exon_seqs())\n+ return seq\n+\n+ def get_translation(self, sequence=None):\n+ translation = None\n+ seq = sequence if sequence else self.get_spliced_seq()\n+ if seq:\n+ seqlen = int(len(seq) / 3) * 3\n+ if seqlen >= 3:\n+ translation = translate(seq[:seqlen])\n+ return translation\n+\n+ def get_translations(self):\n+ translations = []\n+ seq = self.get_spliced_seq()\n+ if seq:\n+ for i in range(3):\n+ translation = self.get_translation(sequence=seq[i:])\n+ if translation:\n+ translations.append(translation)\n+ return translations\n+\n+ def get_subrange(self, tstart, tstop):\n+ """\n+ (start, end)\n+ """\n+ chromStart = self.chromStart\n+ chromEnd = self.chromEnd\n+ r = range(self.blockCount)\n+ if self.strand == \'-\':\n+ r = list(r)\n+ r.reverse()\n+ bStart = 0\n+ for x in r:\n+ bEn'..b's.debug)\n+ for i, tx in enumerate(translations):\n+ if tx:\n+ (chromStart, chromEnd, translation, blockCount, blockSizes, blockStarts) = tx\n+ if options.min_length is not None and len(translation) < options.min_length:\n+ continue\n+ if options.max_stop_codons is not None and translation.count(\'*\') > options.max_stop_codons:\n+ continue\n+ frame_name = \'_%s\' % (i + 1)\n+ pep_id = "%s%s%s" % (options.id_prefix, entry.name, frame_name)\n+ if bed_fh:\n+ bed_fh.write(\'%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%d\\t%d\\t%s\\t%d\\t%s\\t%s\\t%s\\n\' % (str(entry.chrom), chromStart, chromEnd, pep_id, entry.score, entry.strand, chromStart, chromEnd, entry.itemRgb, blockCount, \',\'.join([str(x) for x in blockSizes]), \',\'.join([str(x) for x in blockStarts]), translation))\n+ location = "chromosome:%s:%s:%s:%s:%s" % (options.reference, entry.chrom, chromStart, chromEnd, strand)\n+ score = " %s:%s" % (options.score_name, entry.score) if options.score_name else \'\'\n+ seq_description = "%s %s%s" % (options.seqtype, location, score)\n+ seq_id = "%s " % pep_id\n+ if options.fa_db:\n+ seq_id = "%s%s%s%s" % (options.fa_db, options.fa_sep, pep_id, options.fa_sep)\n+ fa_id = "%s%s" % (seq_id, seq_description)\n+ fa_entry = ">%s\\n%s\\n" % (fa_id, translation)\n+ outFile.write(fa_entry)\n+ if gff_fh:\n+ if gff_fa:\n+ gff_fa.write(fa_entry)\n+ gff_fh.write("##sequence-region %s %d %d\\n" % (entry.chrom, chromStart + 1, chromEnd - 1))\n+ gff_fh.write("%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%d\\tID=%s\\n" % (entry.chrom, \'splice_junc\', \'gene\', chromStart + 1, chromEnd - 1, entry.score, entry.strand, 0, pep_id))\n+ for x in range(blockCount):\n+ start = chromStart + blockStarts[x] + 1\n+ end = start + blockSizes[x] - 1\n+ phase = (3 - sum(blockSizes[:x]) % 3) % 3\n+ gff_fh.write("%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%s\\t%d\\tParent=%s;ID=%s_%d\\n" % (entry.chrom, \'splice_junc\', \'CDS\', start, end, entry.score, entry.strand, phase, pep_id, pep_id, x))\n+ # ##gff-version 3\n+ # ##sequence-region 19 1 287484\n+ # 19 MassSpec peptide 282299 287484 10.0 - 0 ID=TEARLSFYSGHSSFGMYCMVFLALYVQ\n+ # 19 MassSpec CDS 287474 287484 . - 0 Parent=TEARLSFYSGHSSFGMYCMVFLALYVQ;transcript_id=ENST00000269812\n+ # 19 MassSpec CDS 282752 282809 . - 1 Parent=TEARLSFYSGHSSFGMYCMVFLALYVQ;transcript_id=ENST00000269812\n+ # 19 MassSpec CDS 282299 282310 . - 0 Parent=TEARLSFYSGHSSFGMYCMVFLALYVQ;transcript_id=ENST00000269812\n+ if bed_fh:\n+ bed_fh.close()\n+ if gff_fh:\n+ if gff_fa:\n+ gff_fa.close()\n+ else:\n+ outFile.close()\n+ gff_fa = open(gff_fa_file, \'r\')\n+ gff_fh.write("##FASTA\\n")\n+ for i, line in enumerate(gff_fa):\n+ gff_fh.write(line)\n+ gff_fh.close()\n+ except Exception as e:\n+ sys.stderr.write("failed: Error reading %s - %s\\n" % (options.input if options.input else \'stdin\', e))\n+ raise\n+ exit(1)\n \n-if __name__ == "__main__" : __main__()\n \n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 6bbce76c78c1 -r 4221664a2bd0 translate_bed_sequences.xml --- a/translate_bed_sequences.xml Thu Dec 15 18:41:21 2016 -0500 +++ b/translate_bed_sequences.xml Tue Apr 07 11:45:53 2020 -0400 |
b |
@@ -1,7 +1,7 @@ -<tool id="translate_bed_sequences" name="Translate BED Sequences" version="0.1.1"> +<tool id="translate_bed_sequences" name="Translate BED Sequences" version="0.2.0"> <description>3 frame translation of BED augmented with a sequence column</description> <requirements> - <requirement type="package" version="1.62">biopython</requirement> + <requirement type="package" version="1.76">biopython</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Error" /> |