Repository 'mapping_to_ucsc'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/mapping_to_ucsc

Changeset 0:601abbd22cea (2014-05-19)
Next changeset 1:5385aceef9e9 (2015-11-11)
Commit message:
Imported from capsule None
added:
mapping_to_ucsc.py
mapping_to_ucsc.xml
b
diff -r 000000000000 -r 601abbd22cea mapping_to_ucsc.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mapping_to_ucsc.py Mon May 19 12:33:55 2014 -0400
[
b'@@ -0,0 +1,203 @@\n+#!/usr/bin/env python\n+\n+import sys, tempfile, os\n+\n+assert sys.version_info[:2] >= (2.4)\n+\n+def stop_err(msg):\n+    sys.stderr.write(msg)\n+    sys.exit()\n+    \n+def main():\n+\n+    out_fname = sys.argv[1]\n+    in_fname = sys.argv[2]\n+    chr_col = int(sys.argv[3])-1\n+    coord_col = int(sys.argv[4])-1\n+    track_type = sys.argv[5]\n+    if track_type == \'coverage\' or track_type == \'both\': \n+        coverage_col = int(sys.argv[6])-1\n+        cname = sys.argv[7]\n+        cdescription = sys.argv[8]\n+        ccolor = sys.argv[9].replace(\'-\',\',\')\n+        cvisibility = sys.argv[10]\n+    if track_type == \'snp\' or track_type == \'both\':\n+        if track_type == \'both\':\n+            j = 5\n+        else:\n+            j = 0 \n+        #sname = sys.argv[7+j]\n+        sdescription = sys.argv[6+j]\n+        svisibility = sys.argv[7+j]\n+        #ref_col = int(sys.argv[10+j])-1\n+        read_col = int(sys.argv[8+j])-1\n+    \n+\n+    # Sort the input file based on chromosome (alphabetically) and start co-ordinates (numerically)\n+    sorted_infile = tempfile.NamedTemporaryFile()\n+    try:\n+        os.system("sort -k %d,%d -k %dn -o %s %s" %(chr_col+1,chr_col+1,coord_col+1,sorted_infile.name,in_fname))\n+    except Exception, exc:\n+        stop_err( \'Initialization error -> %s\' %str(exc) )\n+\n+    #generate chr list\n+    sorted_infile.seek(0)\n+    chr_vals = []\n+    for line in file( sorted_infile.name ):\n+        line = line.strip()\n+        if not(line):\n+            continue\n+        try:\n+            fields = line.split(\'\\t\')\n+            chr = fields[chr_col]\n+            if chr not in chr_vals:\n+                chr_vals.append(chr)\n+        except:\n+            pass\n+    if not(chr_vals):   \n+        stop_err("Skipped all lines as invalid.")\n+        \n+    if track_type == \'coverage\' or track_type == \'both\':\n+        if track_type == \'coverage\':\n+            fout = open( out_fname, "w" )\n+        else:\n+            fout = tempfile.NamedTemporaryFile()\n+        fout.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+                      % ( cname, cdescription, ccolor, cvisibility ))\n+    if track_type == \'snp\' or track_type == \'both\':\n+        fout_a = tempfile.NamedTemporaryFile()\n+        fout_t = tempfile.NamedTemporaryFile()\n+        fout_g = tempfile.NamedTemporaryFile()\n+        fout_c = tempfile.NamedTemporaryFile()\n+        fout_ref = tempfile.NamedTemporaryFile()\n+        \n+        fout_a.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+                      % ( "Track A", sdescription, \'255,0,0\', svisibility ))\n+        fout_t.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+                      % ( "Track T", sdescription, \'0,255,0\', svisibility ))\n+        fout_g.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+                      % ( "Track G", sdescription, \'0,0,255\', svisibility ))\n+        fout_c.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+                      % ( "Track C", sdescription, \'255,0,255\', svisibility ))\n+        \n+        \n+    sorted_infile.seek(0)\n+    for line in file( sorted_infile.name ):\n+        line = line.strip()\n+        if not(line):\n+            continue\n+        try:\n+            fields = line.split(\'\\t\')\n+            chr = fields[chr_col]\n+            start = int(fields[coord_col])\n+            assert start > 0\n+        except:\n+            continue\n+        try:\n+            ind = chr_vals.index(chr)    #encountered chr for the 1st time\n+            del chr_vals[ind]\n+            prev_start = \'\'\n+            header = "variableStep chrom=%s\\n" %(chr)\n+            if track_type == \'coverage\' or track_type == \'both\':\n+                coverage = int(fields[coverage_col])\n+                line1 = "%s\\t%s\\n" %(start,coverage)\n+                fout.write("%s%s" %(header,line1))\n+            if t'..b'= c = 0\n+                fout_a.write("%s" %(header))\n+                fout_t.write("%s" %(header))\n+                fout_g.write("%s" %(header))\n+                fout_c.write("%s" %(header))\n+                try:\n+                    #ref_nt = fields[ref_col].capitalize()\n+                    read_nt = fields[read_col].capitalize()\n+                    try:\n+                        nt_ind = [\'A\',\'T\',\'G\',\'C\'].index(read_nt)\n+                        if nt_ind == 0:\n+                            a+=1\n+                        elif nt_ind == 1:\n+                            t+=1\n+                        elif nt_ind == 2:\n+                            g+=1\n+                        else:\n+                            c+=1\n+                    except ValueError:\n+                        pass\n+                except:\n+                    pass\n+            prev_start = start\n+        except ValueError:\n+            if start != prev_start:\n+                if track_type == \'coverage\' or track_type == \'both\':\n+                    coverage = int(fields[coverage_col])\n+                    fout.write("%s\\t%s\\n" %(start,coverage)) \n+                if track_type == \'snp\' or track_type == \'both\':\n+                    if a:\n+                        fout_a.write("%s\\t%s\\n" %(prev_start,a))\n+                    if t:\n+                        fout_t.write("%s\\t%s\\n" %(prev_start,t))\n+                    if g:\n+                        fout_g.write("%s\\t%s\\n" %(prev_start,g))\n+                    if c:\n+                        fout_c.write("%s\\t%s\\n" %(prev_start,c))\n+                    a = t = g = c = 0\n+                    try:\n+                        #ref_nt = fields[ref_col].capitalize()\n+                        read_nt = fields[read_col].capitalize()\n+                        try:\n+                            nt_ind = [\'A\',\'T\',\'G\',\'C\'].index(read_nt)\n+                            if nt_ind == 0:\n+                                a+=1\n+                            elif nt_ind == 1:\n+                                t+=1\n+                            elif nt_ind == 2:\n+                                g+=1\n+                            else:\n+                                c+=1\n+                        except ValueError:\n+                            pass\n+                    except:\n+                        pass\n+                prev_start = start\n+            else:\n+                if track_type == \'snp\' or track_type == \'both\':\n+                    try:\n+                        #ref_nt = fields[ref_col].capitalize()\n+                        read_nt = fields[read_col].capitalize()\n+                        try:\n+                            nt_ind = [\'A\',\'T\',\'G\',\'C\'].index(read_nt)\n+                            if nt_ind == 0:\n+                                a+=1\n+                            elif nt_ind == 1:\n+                                t+=1\n+                            elif nt_ind == 2:\n+                                g+=1\n+                            else:\n+                                c+=1\n+                        except ValueError:\n+                            pass\n+                    except:\n+                        pass\n+    \n+    if track_type == \'snp\' or track_type == \'both\':\n+        if a:\n+            fout_a.write("%s\\t%s\\n" %(prev_start,a))\n+        if t:\n+            fout_t.write("%s\\t%s\\n" %(prev_start,t))\n+        if g:\n+            fout_g.write("%s\\t%s\\n" %(prev_start,g))\n+        if c:\n+            fout_c.write("%s\\t%s\\n" %(prev_start,c))\n+            \n+        fout_a.seek(0)\n+        fout_g.seek(0)\n+        fout_t.seek(0)\n+        fout_c.seek(0)    \n+    \n+    if track_type == \'snp\':\n+        os.system("cat %s %s %s %s >> %s" %(fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))\n+    elif track_type == \'both\':\n+        fout.seek(0)\n+        os.system("cat %s %s %s %s %s | cat > %s" %(fout.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))\n+if __name__ == "__main__":\n+    main()\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r 601abbd22cea mapping_to_ucsc.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mapping_to_ucsc.xml Mon May 19 12:33:55 2014 -0400
b
b'@@ -0,0 +1,202 @@\n+<tool id="mapToUCSC" name="Format mapping data" version="1.0.0">\n+  <description> as UCSC custom track</description>\n+  <command interpreter="python">\n+  \tmapping_to_ucsc.py \n+  \t$out_file1\n+  \t$input\n+  \t$chr_col\n+  \t$coord_col\n+  \t$track.track_type\n+  \t#if $track.track_type == "coverage" or $track.track_type == "both"\n+  \t$track.coverage_col\n+    "${track.cname}"\n+    "${track.cdescription}"\n+    "${track.ccolor}"\n+    "${track.cvisibility}"\n+    #end if\n+    #if $track.track_type == "snp" or $track.track_type == "both"\n+    "${track.sdescription}"\n+    "${track.svisibility}"\n+     $track.col2\n+    #end if\n+  </command>\n+  <inputs>\n+    <param format="tabular" name="input" type="data" label="Select mapping data"/>\n+    <param name="chr_col" type="data_column" data_ref="input" label="Column for reference chromosome" />\n+    <param name="coord_col" type="data_column" data_ref="input" numerical="True" label="Numerical column for reference co-ordinate" />\n+    <conditional name="track">\n+      <param name="track_type" type="select" label="Display">\n+    \t<option value="snp" selected="true">SNPs</option>\n+        <option value="coverage">Read coverage</option>\n+    \t<option value="both">Both</option>\n+      </param>\n+      <when value = "coverage">\n+      <param name="coverage_col" type="data_column" data_ref="input" numerical="True" label="Numerical column for read coverage" />\n+      <param name="cname" type="text" size="15" value="User Track" label="Coverage track name">\n+        <validator type="length" max="15"/>\n+      </param>\n+      <param name="cdescription" type="text" value="User Supplied Coverage Track (from Galaxy)" label="Coverage track description">\n+        <validator type="length" max="60" size="15"/>\n+      </param>\n+      <param label="Coverage track Color" name="ccolor" type="select">\n+            <option selected="yes" value="0-0-0">Black</option>\n+            <option value="255-0-0">Red</option>\n+            <option value="0-255-0">Green</option>\n+            <option value="0-0-255">Blue</option>\n+            <option value="255-0-255">Magenta</option>\n+            <option value="0-255-255">Cyan</option>\n+            <option value="255-215-0">Gold</option>\n+            <option value="160-32-240">Purple</option>\n+            <option value="255-140-0">Orange</option>\n+            <option value="255-20-147">Pink</option>\n+            <option value="92-51-23">Dark Chocolate</option>\n+            <option value="85-107-47">Olive green</option>\n+      </param>\n+      <param label="Coverage track Visibility" name="cvisibility" type="select">\n+            <option selected="yes" value="1">Dense</option>\n+            <option value="2">Full</option>\n+            <option value="3">Pack</option>\n+            <option value="4">Squish</option>\n+            <option value="0">Hide</option>\n+      </param>\n+      </when>\n+      \n+      <when value = "snp">\n+      <!-- \n+      <param name="col1" type="data_column" data_ref="input" label="Column containing the reference nucleotide" />\n+       -->\n+      <param name="col2" type="data_column" data_ref="input" label="Column containing the read nucleotide" />\n+      <!-- \n+      <param name="sname" type="text" size="15" value="User Track-2" label="SNP track name">\n+        <validator type="length" max="15"/>\n+      </param>\n+       -->\n+      <param name="sdescription" type="text" value="User Supplied Track (from Galaxy)" label="SNP track description">\n+        <validator type="length" max="60" size="15"/>\n+      </param>\n+      <param label="SNP track Visibility" name="svisibility" type="select">\n+            <option selected="yes" value="1">Dense</option>\n+            <option value="2">Full</option>\n+            <option value="3">Pack</option>\n+            <option value="4">Squish</option>\n+            <option value="0">Hide</option>\n+      </param>\n+      </when>\n+      \n+      <when value = "both">\n+      <param name="coverage_col" type="data_column" data_ref='..b'255-215-0">Gold</option>\n+            <option value="160-32-240">Purple</option>\n+            <option value="255-140-0">Orange</option>\n+            <option value="255-20-147">Pink</option>\n+            <option value="92-51-23">Dark Chocolate</option>\n+            <option value="85-107-47">Olive green</option>\n+      </param>\n+      <param label="Coverage track Visibility" name="cvisibility" type="select">\n+            <option selected="yes" value="1">Dense</option>\n+            <option value="2">Full</option>\n+            <option value="3">Pack</option>\n+            <option value="4">Squish</option>\n+            <option value="0">Hide</option>\n+      </param>\n+      <!-- \n+      <param name="col1" type="data_column" data_ref="input" label="Column containing the reference nucleotide" />\n+       -->\n+      <param name="col2" type="data_column" data_ref="input" label="Column containing the read nucleotide" />\n+      <!-- \n+      <param name="sname" type="text" size="15" value="User Track-2" label="SNP track name">\n+        <validator type="length" max="15"/>\n+      </param>\n+       -->\n+      <param name="sdescription" type="text" size="15" value="User Supplied Track (from Galaxy)" label="SNP track description">\n+        <validator type="length" max="60"/>\n+      </param>\n+      <param label="SNP track Visibility" name="svisibility" type="select">\n+            <option selected="yes" value="1">Dense</option>\n+            <option value="2">Full</option>\n+            <option value="3">Pack</option>\n+            <option value="4">Squish</option>\n+            <option value="0">Hide</option>\n+      </param>\n+      </when>\n+    </conditional>\n+  </inputs>\n+  <outputs>\n+    <data format="customtrack" name="out_file1"/>\n+  </outputs>\n+\n+  \n+ <help> \n+\n+.. class:: infomark\n+\n+**What it does**\n+\n+This tool turns mapping data generated by short read mappers into a format that can be displayed in the UCSC genome browser as a custom track. \n+\n+-----\n+\n+.. class:: warningmark\n+\n+**Note**\n+\n+This tool requires the mapping data to contain at least the following information: \n+\n+chromosome, genome coordinate, read nucleotide (if option to display is SNPs), read coverage (if option to display is Read coverage). \n+\n+-----\n+\n+**Example**\n+\n+For the following Mapping data::\n+\n+   #chr g_start read_id          read_coord g_nt read_nt qual read_coverage\n+   chrM    1   1:29:1672:1127/1    11        G    G       40  134\n+   chrM    1   1:32:93:933/1       4         G    A       40  134\n+   chrM    1   1:34:116:2032/1     11        G    A       40  134\n+   chrM    1   1:39:207:964/1      1         G    G       40  134\n+   chrM    2   1:3:359:848/1       1         G    C       40  234\n+   chrM    2   1:40:1435:1013/1    1         G    G       40  234\n+   chrM    3   1:40:730:972/1      9         G    G       40  334\n+   chrM    4   1:42:1712:921/2     31        G    T       35  434\n+   chrM    4   1:44:1649:493/1     4         G    G       40  434\n+\n+running this tool to display both SNPs and Read coverage will return the following tracks, containing aggregated data per genome co-ordinate::\n+\n+   track type=wiggle_0 name="Coverage Track" description="User Supplied Track (from Galaxy)" color=0,0,0 visibility=1\n+   variableStep chrom=chrM\n+   1   134\n+   2   234\n+   3   334\n+   4   434\n+   track type=wiggle_0 name="Track A" description="User Supplied SNP Track (from Galaxy)" color=255,0,0 visibility=1\n+   variableStep chrom=chrM\n+   1   2\n+   track type=wiggle_0 name="Track T" description="User Supplied SNP Track (from Galaxy)" color=0,255,0 visibility=1\n+   variableStep chrom=chrM\n+   4   1\n+   track type=wiggle_0 name="Track G" description="User Supplied SNP Track (from Galaxy)" color=0,0,255 visibility=1\n+   variableStep chrom=chrM\n+   1   2\n+   2   1\n+   3   1\n+   4   1\n+   track type=wiggle_0 name="Track C" description="User Supplied SNP Track (from Galaxy)" color=255,0,255 visibility=1\n+   variableStep chrom=chrM\n+   2   1\n+   \n+  </help>  \n+</tool>\n'