Galaxy |

Changeset 0:5f49ffce52cb (2019-04-03)

Next changeset 1:cb0378d2d487 (2021-03-14)

Commit message:
planemo upload commit be7e9677908b7864ef0b965a1e219a1840eeb2ec

added:
peptide_genomic_coordinate.py
peptide_genomic_coordinate.xml
test-data/peptides.tabular
test-data/peptides_BED.bed
test-data/test_genomic_mapping_sqlite.sqlite
test-data/test_mz_to_sqlite.sqlite

diff -r 000000000000 -r 5f49ffce52cb peptide_genomic_coordinate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/peptide_genomic_coordinate.py Wed Apr 03 04:04:18 2019 -0400

[

b'@@ -0,0 +1,154 @@\n+#!/usr/bin/env python\n+# \n+# Author: Praveen Kumar\n+# University of Minnesota\n+#\n+# Get peptide\'s genomic coordinate from the protein\'s genomic mapping sqlite file (which is derived from the https://toolshed.g2.bx.psu.edu/view/galaxyp/translate_bed/038ecf54cbec)\n+# \n+# python peptideGenomicCoordinate.py <peptide_list> <mz_to_sqlite DB> <genomic mapping file DB> <output.bed>\n+# \n+import sys\n+import sqlite3\n+\n+\n+def main():\n+ conn = sqlite3.connect(sys.argv[2])\n+ c = conn.cursor()\n+ c.execute("DROP table if exists novel")\n+ conn.commit()\n+ c.execute("CREATE TABLE novel(peptide text)")\n+ pepfile = open(sys.argv[1],"r")\n+ \n+ pep_seq = []\n+ for seq in pepfile.readlines():\n+ seq = seq.strip()\n+ pep_seq.append(tuple([seq]))\n+ \n+ c.executemany("insert into novel(peptide) values(?)", pep_seq)\n+ conn.commit()\n+ \n+ c.execute("SELECT distinct psm.sequence, ps.id, ps.sequence from db_sequence ps, psm_entries psm, novel n, proteins_by_peptide pbp where psm.sequence = n.peptide and pbp.peptide_ref = psm.id and pbp.id = ps.id")\n+ rows = c.fetchall()\n+\n+ conn1 = sqlite3.connect(sys.argv[3])\n+ c1 = conn1.cursor()\n+\n+ outfh = open(sys.argv[4], "w")\n+\n+ master_dict = {}\n+ for each in rows:\n+ peptide = each[0]\n+ acc = each[1]\n+ acc_seq = each[2]\n+ \n+ c1.execute("SELECT chrom,start,end,name,strand,cds_start,cds_end FROM feature_cds_map map WHERE map.name = \'"+acc+"\'")\n+ coordinates = c1.fetchall()\n+ \n+ if len(coordinates) != 0:\n+ pep_start = 0\n+ pep_end = 0\n+ flag = 0\n+ splice_flag = 0\n+ spliced_peptide = []\n+ for each_entry in coordinates:\n+ chromosome = each_entry[0]\n+ start = int(each_entry[1])\n+ end = int(each_entry[2])\n+ strand = each_entry[4]\n+ cds_start = int(each_entry[5])\n+ cds_end = int(each_entry[6])\n+ pep_pos_start = (acc_seq.find(peptide)*3)\n+ pep_pos_end = pep_pos_start + (len(peptide)*3)\n+ if pep_pos_start >= cds_start and pep_pos_end <= cds_end:\n+ if strand == "+":\n+ pep_start = start + pep_pos_start - cds_start\n+ pep_end = start + pep_pos_end - cds_start\n+ pep_thick_start = 0\n+ pep_thick_end = len(peptide)\n+ flag == 1\n+ else:\n+ pep_end = end - pep_pos_start + cds_start\n+ pep_start = end - pep_pos_end + cds_start\n+ pep_thick_start = 0\n+ pep_thick_end = len(peptide)\n+ flag == 1\n+ spliced_peptide = []\n+ splice_flag = 0\n+ else:\n+ if flag == 0:\n+ if strand == "+":\n+ if pep_pos_start >= cds_start and pep_pos_start <= cds_end and pep_pos_end > cds_end:\n+ pep_start = start + pep_pos_start - cds_start\n+ pep_end = end\n+ pep_thick_start = 0\n+ pep_thick_end = (pep_end-pep_start)\n+ spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n+ splice_flag = splice_flag + 1\n+ if splice_flag == 2:\n+ flag = 1\n+ elif pep_pos_end >= cds_start and pep_pos_end <= cds_end and pep_pos_start < cds_start:\n+ pep_start = start\n+ pep_end = start + pep_pos_end - cds_start\n+ pep_thick_start = (len(peptide)*3)-(pep_end-pep_start)\n+ pe'..b' spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n+ splice_flag = splice_flag + 1\n+ if splice_flag == 2:\n+ flag = 1\n+ else:\n+ pass\n+ else:\n+ if pep_pos_start >= cds_start and pep_pos_start <= cds_end and pep_pos_end >= cds_end:\n+ pep_start = start\n+ pep_end = end - pep_pos_start - cds_start\n+ pep_thick_start = 0\n+ pep_thick_end = (pep_end-pep_start)\n+ spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n+ splice_flag = splice_flag + 1\n+ if splice_flag == 2:\n+ flag = 1\n+ elif pep_pos_end >= cds_start and pep_pos_end <= cds_end and pep_pos_start <= cds_start:\n+ pep_start = end - pep_pos_end + cds_start\n+ pep_end = end\n+ pep_thick_start = (len(peptide)*3)-(pep_end-pep_start)\n+ pep_thick_end = (len(peptide)*3)\n+ spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n+ splice_flag = splice_flag + 1\n+ if splice_flag == 2:\n+ flag = 1\n+ else:\n+ pass\n+\n+ if len(spliced_peptide) == 0:\n+ if strand == "+":\n+ bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"]\n+ else:\n+ bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"]\n+ outfh.write("\\t".join(bed_line)+"\\n")\n+ else:\n+ if strand == "+":\n+ pep_entry = spliced_peptide\n+ pep_start = min([pep_entry[0][0], pep_entry[1][0]])\n+ pep_end = max([pep_entry[0][1], pep_entry[1][1]])\n+ blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))]\n+ blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))]\n+ bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)]\n+ outfh.write("\\t".join(bed_line)+"\\n")\n+ else:\n+ pep_entry = spliced_peptide\n+ pep_start = min([pep_entry[0][0], pep_entry[1][0]])\n+ pep_end = max([pep_entry[0][1], pep_entry[1][1]])\n+ blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))]\n+ blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))]\n+ bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)]\n+ outfh.write("\\t".join(bed_line)+"\\n")\n+ c.execute("DROP table novel")\n+ conn.commit()\n+ conn.close()\n+ conn1.close()\n+ outfh.close()\n+ pepfile.close()\n+ \n+ return None\n+if __name__ == "__main__":\n+ main()\n'

diff -r 000000000000 -r 5f49ffce52cb peptide_genomic_coordinate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/peptide_genomic_coordinate.xml Wed Apr 03 04:04:18 2019 -0400

[

@@ -0,0 +1,58 @@
+<tool id="peptide_genomic_coordinate" name="Peptide Genomic Coodinate" version="0.1.1">
+    <description>Get Peptide's genomic coordinate using mzsqlite DB and genomic mapping sqlite DB</description>
+    <requirements>
+        <requirement type="package" version="3.7.1">python</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+        python '$__tool_directory__/peptide_genomic_coordinate.py'
+        '$peptideinput'
+        '$mzsqlite'
+        '$mapping'
+        '$peptide_bed'
+    ]]></command>
+    <inputs>
+        <param type="data" name="peptideinput" format="tabular" label="Peptide List (without any header line)"/>
+        <param type="data" name="mzsqlite" format="sqlite" label="mz to sqlite (mzsqlite) file"/>
+        <param type="data" name="mapping" format="sqlite" label="genomic mapping sqlite file"/>
+    </inputs>
+    <outputs>
+        <data format="bed" name="peptide_bed" label="${tool.name} on ${on_string}">
+            <actions>
+                <action name="column_names" type="metadata" default="chrom,chromStart,chromStop,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts"/>
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="peptideinput" value="peptides.tabular"/>
+            <param name="mzsqlite" value="test_mz_to_sqlite.sqlite"/>
+            <param name="mapping" value="test_genomic_mapping_sqlite.sqlite"/>
+            <output name="peptide_bed">
+                <assert_contents>
+                    <has_text text="115176449" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+        **Peptide Genomic Coodinate**
+
+        Gets genomic coordinate of peptides based on the information in mzsqlite and genomic mapping sqlite files. This tool is useful in a proteogenomics workflow.
+        This program loads two sqlite databases (mzsqlite and genomic mapping sqlite files) and calculates the genomic coordinates of the peptides provided as input. This outputs bed file for peptides.
+
+        Input: Peptide list file, mzsqlite sqlite DB file, and genomic mapping sqlite DB file
+        Output: Tabular BED file with all the columns
+
+
+
+    ]]></help>
+    <citations>
+      <citation type="bibtex">
+@misc{peptidegenomiccoodinate,
+    author={Kumar, Praveen},
+    year={2018},
+    title={Peptide Genomic Coordinate}
+}
+      </citation>
+    </citations>
+</tool>

diff -r 000000000000 -r 5f49ffce52cb test-data/peptides.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/peptides.tabular Wed Apr 03 04:04:18 2019 -0400

@@ -0,0 +1,4 @@
+AVDPDSSAEASGLR
+DGDLENPVLYSGAVK
+DSGASGSILEASAAR
+ELGSSDLTAR

diff -r 000000000000 -r 5f49ffce52cb test-data/peptides_BED.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/peptides_BED.bed Wed Apr 03 04:04:18 2019 -0400

@@ -0,0 +1,4 @@
+chr11 115176449 115176491 AVDPDSSAEASGLR 255 + 115176449 115176491 0 1 42 0
+chr5 121445444 121445489 DGDLENPVLYSGAVK 255 - 121445444 121445489 0 1 45 0
+chr17 22866997 22867042 DSGASGSILEASAAR 255 - 22866997 22867042 0 1 45 0
+chr2 91155262 91155292 ELGSSDLTAR 255 - 91155262 91155292 0 1 30 0

diff -r 000000000000 -r 5f49ffce52cb test-data/test_genomic_mapping_sqlite.sqlite

Binary file test-data/test_genomic_mapping_sqlite.sqlite has changed

diff -r 000000000000 -r 5f49ffce52cb test-data/test_mz_to_sqlite.sqlite

Binary file test-data/test_mz_to_sqlite.sqlite has changed