| Previous changeset 3:95d606bdfef7 (2018-06-17) |
|
Commit message:
planemo upload |
|
modified:
test-data/peptides.tabular test-data/peptides_BED.bed test-data/test_genomic_mapping_sqlite.sqlite test-data/test_mz_to_sqlite.sqlite |
|
added:
peptideGenomicCoodinate.py peptideGenomicCoodinate.xml |
|
removed:
peptideGenomicCoordinate.py peptideGenomicCoordinate.xml |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b peptideGenomicCoodinate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptideGenomicCoodinate.py Tue Dec 18 16:22:29 2018 -0500 |
| [ |
| @@ -0,0 +1,150 @@ + +# +# Author: Praveen Kumar +# University of Minnesota +# +# Get peptide's genomic coordinate from the protein's genomic mapping sqlite file (which is derived from the https://toolshed.g2.bx.psu.edu/view/galaxyp/translate_bed/038ecf54cbec) +# +# python peptideGenomicCoordinate.py <peptide_list> <mz_to_sqlite DB> <genomic mapping file DB> <output.bed> +# +# + +def main(): + import sys + import sqlite3 + conn = sqlite3.connect(sys.argv[2]) + c = conn.cursor() + c.execute("DROP table if exists novel") + conn.commit() + c.execute("CREATE TABLE novel(peptide text)") + pepfile = open(sys.argv[1],"r") + + for seq in pepfile.readlines(): + seq = seq.strip() + c.execute('INSERT INTO novel VALUES ("'+seq+'")') + conn.commit() + + c.execute("SELECT distinct psm.sequence, ps.id, ps.sequence from db_sequence ps, psm_entries psm, novel n, proteins_by_peptide pbp where psm.sequence = n.peptide and pbp.peptide_ref = psm.id and pbp.id = ps.id") + rows = c.fetchall() + + conn1 = sqlite3.connect(sys.argv[3]) + c1 = conn1.cursor() + + outfh = open(sys.argv[4], "w") + + master_dict = {} + for each in rows: + peptide = each[0] + acc = each[1] + acc_seq = each[2] + + c1.execute("SELECT chrom,start,end,name,strand,cds_start,cds_end FROM feature_cds_map map WHERE map.name = '"+acc+"'") + coordinates = c1.fetchall() + + if len(coordinates) != 0: + pep_start = 0 + pep_end = 0 + flag = 0 + splice_flag = 0 + spliced_peptide = [] + for each_entry in coordinates: + chromosome = each_entry[0] + start = int(each_entry[1]) + end = int(each_entry[2]) + strand = each_entry[4] + cds_start = int(each_entry[5]) + cds_end = int(each_entry[6]) + pep_pos_start = (acc_seq.find(peptide)*3) + pep_pos_end = pep_pos_start + (len(peptide)*3) + if (pep_pos_start >= cds_start) and (pep_pos_end <= cds_end): + if strand == "+": + pep_start = start + pep_pos_start - cds_start + pep_end = start + pep_pos_end - cds_start + pep_thick_start = 0 + pep_thick_end = len(peptide) + flag == 1 + else: + pep_end = end - pep_pos_start + cds_start + pep_start = end - pep_pos_end + cds_start + pep_thick_start = 0 + pep_thick_end = len(peptide) + flag == 1 + spliced_peptide = [] + splice_flag = 0 + else: + if flag == 0: + if strand == "+": + if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end > cds_end): + pep_start = start + pep_pos_start - cds_start + pep_end = end + pep_thick_start = 0 + pep_thick_end = (pep_end-pep_start) + spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) + splice_flag = splice_flag + 1 + if splice_flag == 2: + flag = 1 + elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start < cds_start): + pep_start = start + pep_end = start + pep_pos_end - cds_start + pep_thick_start = (len(peptide)*3)-(pep_end-pep_start) + pep_thick_end = (len(peptide)*3) + spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) + splice_flag = splice_flag + 1 + if splice_flag == 2: + flag = 1 + else: + pass + else: + if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end >= cds_end): + pep_start = start + pep_end = end - pep_pos_start - cds_start + pep_thick_start = 0 + pep_thick_end = (pep_end-pep_start) + spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) + splice_flag = splice_flag + 1 + if splice_flag == 2: + flag = 1 + elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start <= cds_start): + pep_start = end - pep_pos_end + cds_start + pep_end = end + pep_thick_start = (len(peptide)*3)-(pep_end-pep_start) + pep_thick_end = (len(peptide)*3) + spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end]) + splice_flag = splice_flag + 1 + if splice_flag == 2: + flag = 1 + else: + pass + if len(spliced_peptide) == 0: + if strand == "+": + bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"] + else: + bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"] + outfh.write("\t".join(bed_line)+"\n") + else: + if strand == "+": + pep_entry = spliced_peptide + pep_start = min([pep_entry[0][0], pep_entry[1][0]]) + pep_end = max([pep_entry[0][1], pep_entry[1][1]]) + blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))] + blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))] + bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)] + outfh.write("\t".join(bed_line)+"\n") + else: + pep_entry = spliced_peptide + pep_start = min([pep_entry[0][0], pep_entry[1][0]]) + pep_end = max([pep_entry[0][1], pep_entry[1][1]]) + blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))] + blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))] + bed_line = [chromosome, str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)] + outfh.write("\t".join(bed_line)+"\n") + c.execute("DROP table novel") + conn.commit() + conn.close() + conn1.close() + outfh.close() + pepfile.close() + + return None +if __name__ == "__main__": + main() |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b peptideGenomicCoodinate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptideGenomicCoodinate.xml Tue Dec 18 16:22:29 2018 -0500 |
| [ |
| @@ -0,0 +1,56 @@ +<tool id="peptidegenomiccoodinate" name="Peptide Genomic Coodinate" version="0.1.1"> + <description>Get genomic location/coordinate of peptides using mzsqlite DB and genomic mapping sqlite DB</description> + <requirements> + <requirement type="package" version="2.7.9">python</requirement> + <!--<requirement type="package" version="3.26.0">sqlite</requirement>--> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + python '$__tool_directory__/peptideGenomicCoodinate.py' + '$peptideinput' + '$mzsqlite' + '$mapping' + '$peptide_bed' + ]]></command> + <inputs> + <param type="data" name="peptideinput" format="tabular" label="Peptide List (without any header line)"/> + <param type="data" name="mzsqlite" format="sqlite" label="mz to sqlite (mzsqlite) file"/> + <param type="data" name="mapping" format="sqlite" label="genomic mapping sqlite file"/> + </inputs> + <outputs> + <data format="bed" name="peptide_bed" label="${tool.name} on ${on_string}"> + <actions> + <action name="column_names" type="metadata" default="chrom,chromStart,chromStop,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts"/> + </actions> + </data> + </outputs> + <tests> + <test> + <param name="peptideinput" value="peptides.tabular"/> + <param name="mzsqlite" value="test_mz_to_sqlite.sqlite"/> + <param name="mapping" value="test_genomic_mapping_sqlite.sqlite"/> + <output name="peptide_bed"> + <assert_contents> + <has_text text="115176449" /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + **PeptideGenomicCoodinate** + Gets genomic coordinate of peptides based on the information in mzsqlite and genomic mapping sqlite files. This tool is useful in a proteogenomics workflow. + This program loads two sqlite databases (mzsqlite and genomic mapping sqlite files) and calculates the genomic coordinates of the peptides provided as input. This outputs bed file for peptides. + + Input: Peptide list file, mzsqlite sqlite DB file, and genomic mapping sqlite DB file + Output: Tabular BED file with all the columns + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{peptidegenomiccoodinate, + author={Kumar, Praveen}, + year={2018}, + title={PeptideGenomicCoordinate} +} + </citation> + </citations> +</tool> |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b peptideGenomicCoordinate.py --- a/peptideGenomicCoordinate.py Sun Jun 17 04:55:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| b'@@ -1,150 +0,0 @@\n-\n-# \n-# Author: Praveen Kumar\n-# University of Minnesota\n-# Galaxy-P Team\n-# Get peptide\'s genomic coordinate from the protein\'s genomic mapping sqlite file (which is derived from the https://toolshed.g2.bx.psu.edu/view/galaxyp/translate_bed/038ecf54cbec)\n-# \n-# python peptideGenomicCoordinate.py <peptide_list> <mz_to_sqlite DB> <genomic mapping file DB> <output.bed>\n-# \n-#\n-\n-def main():\n- import sys\n- import sqlite3\n- conn = sqlite3.connect(sys.argv[2])\n- c = conn.cursor()\n- c.execute("DROP table if exists novel")\n- conn.commit()\n- c.execute("CREATE TABLE novel(peptide text)")\n- pepfile = open(sys.argv[1],"r")\n-\n- for seq in pepfile.readlines():\n- seq = seq.strip()\n- c.execute(\'INSERT INTO novel VALUES ("\'+seq+\'")\')\n- conn.commit()\n-\n- c.execute("SELECT distinct psm.sequence, ps.id, ps.sequence from db_sequence ps, psm_entries psm, novel n, proteins_by_peptide pbp where psm.sequence = n.peptide and pbp.peptide_ref = psm.id and pbp.id = ps.id")\n- rows = c.fetchall()\n-\n- conn1 = sqlite3.connect(sys.argv[3])\n- c1 = conn1.cursor()\n-\n- outfh = open(sys.argv[4], "w")\n-\n- master_dict = {}\n- for each in rows:\n- peptide = each[0]\n- acc = each[1]\n- acc_seq = each[2]\n- \n- c1.execute("SELECT chrom,start,end,name,strand,cds_start,cds_end FROM feature_cds_map map WHERE map.name = \'"+acc+"\'")\n- coordinates = c1.fetchall()\n- \n- if len(coordinates) != 0:\n- pep_start = 0\n- pep_end = 0\n- flag = 0\n- splice_flag = 0\n- spliced_peptide = []\n- for each_entry in coordinates:\n- chromosome = each_entry[0]\n- start = int(each_entry[1])\n- end = int(each_entry[2])\n- strand = each_entry[4]\n- cds_start = int(each_entry[5])\n- cds_end = int(each_entry[6])\n- pep_pos_start = (acc_seq.find(peptide)*3)\n- pep_pos_end = pep_pos_start + (len(peptide)*3)\n- if (pep_pos_start >= cds_start) and (pep_pos_end <= cds_end):\n- if strand == "+":\n- pep_start = start + pep_pos_start - cds_start\n- pep_end = start + pep_pos_end - cds_start\n- pep_thick_start = 0\n- pep_thick_end = len(peptide)\n- flag == 1\n- else:\n- pep_end = end - pep_pos_start + cds_start\n- pep_start = end - pep_pos_end + cds_start\n- pep_thick_start = 0\n- pep_thick_end = len(peptide)\n- flag == 1\n- spliced_peptide = []\n- splice_flag = 0\n- else:\n- if flag == 0:\n- if strand == "+":\n- if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end > cds_end):\n- pep_start = start + pep_pos_start - cds_start\n- pep_end = end\n- pep_thick_start = 0\n- pep_thick_end = (pep_end-pep_start)\n- spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n- splice_flag = splice_flag + 1\n- if splice_flag == 2:\n- flag = 1\n- elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start < cds_start):\n- pep_start = start\n- pep_end = start + pep_pos_end - cds_start\n- pep_thick_start = (len(peptide)*3)-(pep_end-pep_start)\n- pep_thick_end = (len(peptide)*3)\n- '..b'liced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n- splice_flag = splice_flag + 1\n- if splice_flag == 2:\n- flag = 1\n- else:\n- pass\n- else:\n- if (pep_pos_start >= cds_start) and (pep_pos_start <= cds_end) and (pep_pos_end >= cds_end):\n- pep_start = start\n- pep_end = end - pep_pos_start - cds_start\n- pep_thick_start = 0\n- pep_thick_end = (pep_end-pep_start)\n- spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n- splice_flag = splice_flag + 1\n- if splice_flag == 2:\n- flag = 1\n- elif (pep_pos_end >= cds_start) and (pep_pos_end <= cds_end) and (pep_pos_start <= cds_start):\n- pep_start = end - pep_pos_end + cds_start\n- pep_end = end\n- pep_thick_start = (len(peptide)*3)-(pep_end-pep_start)\n- pep_thick_end = (len(peptide)*3)\n- spliced_peptide.append([pep_start,pep_end,pep_thick_start,pep_thick_end])\n- splice_flag = splice_flag + 1\n- if splice_flag == 2:\n- flag = 1\n- else:\n- pass\n- if len(spliced_peptide) == 0:\n- if strand == "+":\n- bed_line = [str(chromosome), str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"]\n- else:\n- bed_line = [str(chromosome), str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "1", str(pep_end-pep_start), "0"]\n- outfh.write("\\t".join(bed_line)+"\\n")\n- else:\n- if strand == "+":\n- pep_entry = spliced_peptide\n- pep_start = min([pep_entry[0][0], pep_entry[1][0]])\n- pep_end = max([pep_entry[0][1], pep_entry[1][1]])\n- blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))]\n- blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))]\n- bed_line = [str(chromosome), str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)]\n- outfh.write("\\t".join(bed_line)+"\\n")\n- else:\n- pep_entry = spliced_peptide\n- pep_start = min([pep_entry[0][0], pep_entry[1][0]])\n- pep_end = max([pep_entry[0][1], pep_entry[1][1]])\n- blockSize = [str(min([pep_entry[0][3], pep_entry[1][3]])),str(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]]))]\n- blockStarts = ["0", str(pep_end-pep_start-(max([pep_entry[0][3], pep_entry[1][3]])-min([pep_entry[0][3], pep_entry[1][3]])))]\n- bed_line = [str(chromosome), str(pep_start), str(pep_end), peptide, "255", strand, str(pep_start), str(pep_end), "0", "2", ",".join(blockSize), ",".join(blockStarts)]\n- outfh.write("\\t".join(bed_line)+"\\n")\n- c.execute("DROP table novel")\n- conn.commit()\n- conn.close()\n- conn1.close()\n- outfh.close()\n- pepfile.close()\n- \n- return None\n-if __name__ == "__main__":\n- main()\n' |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b peptideGenomicCoordinate.xml --- a/peptideGenomicCoordinate.xml Sun Jun 17 04:55:42 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
| [ |
| @@ -1,57 +0,0 @@ -<tool id="peptidegenomiccoordinate" name="Peptide Genomic Coordinate" version="0.1.2"> - <description>Get genomic location/coordinate of peptides using mzsqlite DB and genomic mapping sqlite DB</description> - <requirements> - <requirement type="package" version="2.7.9">python</requirement> - </requirements> - <command detect_errors="aggressive"><![CDATA[ - python '$__tool_directory__/peptideGenomicCoordinate.py' '$peptideinput' '$mzsqlite' '$mapping' '$peptide_bed' - ]]></command> - - <inputs> - <param type="data" name="peptideinput" format="tabular" label="Peptide List (without any header line)"/> - <param type="data" name="mzsqlite" format="sqlite" label="mz to sqlite (mzsqlite) file"/> - <param type="data" name="mapping" format="sqlite" label="genomic mapping sqlite file"/> - </inputs> - - <outputs> - <data format="bed" name="peptide_bed" label="${tool.name} on ${on_string}"> - <actions> - <action name="column_names" type="metadata" default="chrom,chromStart,chromStop,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts"/> - </actions> - </data> - </outputs> - - <tests> - <test> - <param name="peptide_input" value="peptides.tabular"/> - <param name="sqlite" value="test_mz_to_sqlite.sqlite"/> - <param name="sqlite" value="test_genomic_mapping_sqlite.sqlite"/> - <output name="peptide_bed" file="peptides_BED.bed"/> - </test> - </tests> - <help><![CDATA[ - **PeptideGenomicCoordinate** - - Gets genomic coordinate of peptides based on the information in mzsqlite and genomic mapping sqlite files. - This program loads two sqlite databases (mzsqlite and genomic mapping sqlite files) and calculates the genomic coordinates of the peptides provided as input. This outputs bed file for peptides. - - Input: Peptide list file, mzsqlite sqlite DB file, and genomic mapping sqlite DB file - Output: Tabular BED file with all the columns - - mzsqlite file from: https://toolshed.g2.bx.psu.edu/repos/galaxyp/mz_to_sqlite/mz_to_sqlite/2.0.0 - genome mapping sqlite file from: https://toolshed.g2.bx.psu.edu/view/galaxyp/translate_bed/038ecf54cbec - - - P.S. : Requires sqlite - - ]]></help> - <citations> - <citation type="bibtex"> -@misc{peptidegenomiccoodinate, - author={Kumar, Praveen}, - year={2018}, - title={PeptideGenomicCoordinate} -} - </citation> - </citations> -</tool> |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b test-data/peptides.tabular --- a/test-data/peptides.tabular Sun Jun 17 04:55:42 2018 -0400 +++ b/test-data/peptides.tabular Tue Dec 18 16:22:29 2018 -0500 |
| b |
| @@ -1,8 +1,4 @@ AVDPDSSAEASGLR -AVDPDSSAEASGLRAQDR DGDLENPVLYSGAVK DSGASGSILEASAAR ELGSSDLTAR -ESSREALVEPTSESPRPALAR -NIYITLLSCFK -SPYREFTDHLVK |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b test-data/peptides_BED.bed --- a/test-data/peptides_BED.bed Sun Jun 17 04:55:42 2018 -0400 +++ b/test-data/peptides_BED.bed Tue Dec 18 16:22:29 2018 -0500 |
| b |
| @@ -1,8 +1,4 @@ chr11 115176449 115176491 AVDPDSSAEASGLR 255 + 115176449 115176491 0 1 42 0 -chr11 115176449 115176503 AVDPDSSAEASGLRAQDR 255 + 115176449 115176503 0 1 54 0 chr5 121445444 121445489 DGDLENPVLYSGAVK 255 - 121445444 121445489 0 1 45 0 chr17 22866997 22867042 DSGASGSILEASAAR 255 - 22866997 22867042 0 1 45 0 chr2 91155262 91155292 ELGSSDLTAR 255 - 91155262 91155292 0 1 30 0 -chr11 115180006 115180069 ESSREALVEPTSESPRPALAR 255 + 115180006 115180069 0 1 63 0 -chr4 58482350 58482383 NIYITLLSCFK 255 + 58482350 58482383 0 1 33 0 -chr17 24721702 24721826 SPYREFTDHLVK 255 + 24721702 24721826 0 2 12,24 0,100 |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b test-data/test_genomic_mapping_sqlite.sqlite |
| b |
| Binary file test-data/test_genomic_mapping_sqlite.sqlite has changed |
| b |
| diff -r 95d606bdfef7 -r b56922070a1b test-data/test_mz_to_sqlite.sqlite |
| b |
| Binary file test-data/test_mz_to_sqlite.sqlite has changed |