Next changeset 1:5385aceef9e9 (2015-11-11) |
Commit message:
Imported from capsule None |
added:
mapping_to_ucsc.py mapping_to_ucsc.xml |
b |
diff -r 000000000000 -r 601abbd22cea mapping_to_ucsc.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_to_ucsc.py Mon May 19 12:33:55 2014 -0400 |
[ |
b'@@ -0,0 +1,203 @@\n+#!/usr/bin/env python\n+\n+import sys, tempfile, os\n+\n+assert sys.version_info[:2] >= (2.4)\n+\n+def stop_err(msg):\n+ sys.stderr.write(msg)\n+ sys.exit()\n+ \n+def main():\n+\n+ out_fname = sys.argv[1]\n+ in_fname = sys.argv[2]\n+ chr_col = int(sys.argv[3])-1\n+ coord_col = int(sys.argv[4])-1\n+ track_type = sys.argv[5]\n+ if track_type == \'coverage\' or track_type == \'both\': \n+ coverage_col = int(sys.argv[6])-1\n+ cname = sys.argv[7]\n+ cdescription = sys.argv[8]\n+ ccolor = sys.argv[9].replace(\'-\',\',\')\n+ cvisibility = sys.argv[10]\n+ if track_type == \'snp\' or track_type == \'both\':\n+ if track_type == \'both\':\n+ j = 5\n+ else:\n+ j = 0 \n+ #sname = sys.argv[7+j]\n+ sdescription = sys.argv[6+j]\n+ svisibility = sys.argv[7+j]\n+ #ref_col = int(sys.argv[10+j])-1\n+ read_col = int(sys.argv[8+j])-1\n+ \n+\n+ # Sort the input file based on chromosome (alphabetically) and start co-ordinates (numerically)\n+ sorted_infile = tempfile.NamedTemporaryFile()\n+ try:\n+ os.system("sort -k %d,%d -k %dn -o %s %s" %(chr_col+1,chr_col+1,coord_col+1,sorted_infile.name,in_fname))\n+ except Exception, exc:\n+ stop_err( \'Initialization error -> %s\' %str(exc) )\n+\n+ #generate chr list\n+ sorted_infile.seek(0)\n+ chr_vals = []\n+ for line in file( sorted_infile.name ):\n+ line = line.strip()\n+ if not(line):\n+ continue\n+ try:\n+ fields = line.split(\'\\t\')\n+ chr = fields[chr_col]\n+ if chr not in chr_vals:\n+ chr_vals.append(chr)\n+ except:\n+ pass\n+ if not(chr_vals): \n+ stop_err("Skipped all lines as invalid.")\n+ \n+ if track_type == \'coverage\' or track_type == \'both\':\n+ if track_type == \'coverage\':\n+ fout = open( out_fname, "w" )\n+ else:\n+ fout = tempfile.NamedTemporaryFile()\n+ fout.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+ % ( cname, cdescription, ccolor, cvisibility ))\n+ if track_type == \'snp\' or track_type == \'both\':\n+ fout_a = tempfile.NamedTemporaryFile()\n+ fout_t = tempfile.NamedTemporaryFile()\n+ fout_g = tempfile.NamedTemporaryFile()\n+ fout_c = tempfile.NamedTemporaryFile()\n+ fout_ref = tempfile.NamedTemporaryFile()\n+ \n+ fout_a.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+ % ( "Track A", sdescription, \'255,0,0\', svisibility ))\n+ fout_t.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+ % ( "Track T", sdescription, \'0,255,0\', svisibility ))\n+ fout_g.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+ % ( "Track G", sdescription, \'0,0,255\', svisibility ))\n+ fout_c.write(\'\'\'track type=wiggle_0 name="%s" description="%s" color=%s visibility=%s\\n\'\'\' \\\n+ % ( "Track C", sdescription, \'255,0,255\', svisibility ))\n+ \n+ \n+ sorted_infile.seek(0)\n+ for line in file( sorted_infile.name ):\n+ line = line.strip()\n+ if not(line):\n+ continue\n+ try:\n+ fields = line.split(\'\\t\')\n+ chr = fields[chr_col]\n+ start = int(fields[coord_col])\n+ assert start > 0\n+ except:\n+ continue\n+ try:\n+ ind = chr_vals.index(chr) #encountered chr for the 1st time\n+ del chr_vals[ind]\n+ prev_start = \'\'\n+ header = "variableStep chrom=%s\\n" %(chr)\n+ if track_type == \'coverage\' or track_type == \'both\':\n+ coverage = int(fields[coverage_col])\n+ line1 = "%s\\t%s\\n" %(start,coverage)\n+ fout.write("%s%s" %(header,line1))\n+ if t'..b'= c = 0\n+ fout_a.write("%s" %(header))\n+ fout_t.write("%s" %(header))\n+ fout_g.write("%s" %(header))\n+ fout_c.write("%s" %(header))\n+ try:\n+ #ref_nt = fields[ref_col].capitalize()\n+ read_nt = fields[read_col].capitalize()\n+ try:\n+ nt_ind = [\'A\',\'T\',\'G\',\'C\'].index(read_nt)\n+ if nt_ind == 0:\n+ a+=1\n+ elif nt_ind == 1:\n+ t+=1\n+ elif nt_ind == 2:\n+ g+=1\n+ else:\n+ c+=1\n+ except ValueError:\n+ pass\n+ except:\n+ pass\n+ prev_start = start\n+ except ValueError:\n+ if start != prev_start:\n+ if track_type == \'coverage\' or track_type == \'both\':\n+ coverage = int(fields[coverage_col])\n+ fout.write("%s\\t%s\\n" %(start,coverage)) \n+ if track_type == \'snp\' or track_type == \'both\':\n+ if a:\n+ fout_a.write("%s\\t%s\\n" %(prev_start,a))\n+ if t:\n+ fout_t.write("%s\\t%s\\n" %(prev_start,t))\n+ if g:\n+ fout_g.write("%s\\t%s\\n" %(prev_start,g))\n+ if c:\n+ fout_c.write("%s\\t%s\\n" %(prev_start,c))\n+ a = t = g = c = 0\n+ try:\n+ #ref_nt = fields[ref_col].capitalize()\n+ read_nt = fields[read_col].capitalize()\n+ try:\n+ nt_ind = [\'A\',\'T\',\'G\',\'C\'].index(read_nt)\n+ if nt_ind == 0:\n+ a+=1\n+ elif nt_ind == 1:\n+ t+=1\n+ elif nt_ind == 2:\n+ g+=1\n+ else:\n+ c+=1\n+ except ValueError:\n+ pass\n+ except:\n+ pass\n+ prev_start = start\n+ else:\n+ if track_type == \'snp\' or track_type == \'both\':\n+ try:\n+ #ref_nt = fields[ref_col].capitalize()\n+ read_nt = fields[read_col].capitalize()\n+ try:\n+ nt_ind = [\'A\',\'T\',\'G\',\'C\'].index(read_nt)\n+ if nt_ind == 0:\n+ a+=1\n+ elif nt_ind == 1:\n+ t+=1\n+ elif nt_ind == 2:\n+ g+=1\n+ else:\n+ c+=1\n+ except ValueError:\n+ pass\n+ except:\n+ pass\n+ \n+ if track_type == \'snp\' or track_type == \'both\':\n+ if a:\n+ fout_a.write("%s\\t%s\\n" %(prev_start,a))\n+ if t:\n+ fout_t.write("%s\\t%s\\n" %(prev_start,t))\n+ if g:\n+ fout_g.write("%s\\t%s\\n" %(prev_start,g))\n+ if c:\n+ fout_c.write("%s\\t%s\\n" %(prev_start,c))\n+ \n+ fout_a.seek(0)\n+ fout_g.seek(0)\n+ fout_t.seek(0)\n+ fout_c.seek(0) \n+ \n+ if track_type == \'snp\':\n+ os.system("cat %s %s %s %s >> %s" %(fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))\n+ elif track_type == \'both\':\n+ fout.seek(0)\n+ os.system("cat %s %s %s %s %s | cat > %s" %(fout.name,fout_a.name,fout_t.name,fout_g.name,fout_c.name,out_fname))\n+if __name__ == "__main__":\n+ main()\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 601abbd22cea mapping_to_ucsc.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapping_to_ucsc.xml Mon May 19 12:33:55 2014 -0400 |
b |
b'@@ -0,0 +1,202 @@\n+<tool id="mapToUCSC" name="Format mapping data" version="1.0.0">\n+ <description> as UCSC custom track</description>\n+ <command interpreter="python">\n+ \tmapping_to_ucsc.py \n+ \t$out_file1\n+ \t$input\n+ \t$chr_col\n+ \t$coord_col\n+ \t$track.track_type\n+ \t#if $track.track_type == "coverage" or $track.track_type == "both"\n+ \t$track.coverage_col\n+ "${track.cname}"\n+ "${track.cdescription}"\n+ "${track.ccolor}"\n+ "${track.cvisibility}"\n+ #end if\n+ #if $track.track_type == "snp" or $track.track_type == "both"\n+ "${track.sdescription}"\n+ "${track.svisibility}"\n+ $track.col2\n+ #end if\n+ </command>\n+ <inputs>\n+ <param format="tabular" name="input" type="data" label="Select mapping data"/>\n+ <param name="chr_col" type="data_column" data_ref="input" label="Column for reference chromosome" />\n+ <param name="coord_col" type="data_column" data_ref="input" numerical="True" label="Numerical column for reference co-ordinate" />\n+ <conditional name="track">\n+ <param name="track_type" type="select" label="Display">\n+ \t<option value="snp" selected="true">SNPs</option>\n+ <option value="coverage">Read coverage</option>\n+ \t<option value="both">Both</option>\n+ </param>\n+ <when value = "coverage">\n+ <param name="coverage_col" type="data_column" data_ref="input" numerical="True" label="Numerical column for read coverage" />\n+ <param name="cname" type="text" size="15" value="User Track" label="Coverage track name">\n+ <validator type="length" max="15"/>\n+ </param>\n+ <param name="cdescription" type="text" value="User Supplied Coverage Track (from Galaxy)" label="Coverage track description">\n+ <validator type="length" max="60" size="15"/>\n+ </param>\n+ <param label="Coverage track Color" name="ccolor" type="select">\n+ <option selected="yes" value="0-0-0">Black</option>\n+ <option value="255-0-0">Red</option>\n+ <option value="0-255-0">Green</option>\n+ <option value="0-0-255">Blue</option>\n+ <option value="255-0-255">Magenta</option>\n+ <option value="0-255-255">Cyan</option>\n+ <option value="255-215-0">Gold</option>\n+ <option value="160-32-240">Purple</option>\n+ <option value="255-140-0">Orange</option>\n+ <option value="255-20-147">Pink</option>\n+ <option value="92-51-23">Dark Chocolate</option>\n+ <option value="85-107-47">Olive green</option>\n+ </param>\n+ <param label="Coverage track Visibility" name="cvisibility" type="select">\n+ <option selected="yes" value="1">Dense</option>\n+ <option value="2">Full</option>\n+ <option value="3">Pack</option>\n+ <option value="4">Squish</option>\n+ <option value="0">Hide</option>\n+ </param>\n+ </when>\n+ \n+ <when value = "snp">\n+ <!-- \n+ <param name="col1" type="data_column" data_ref="input" label="Column containing the reference nucleotide" />\n+ -->\n+ <param name="col2" type="data_column" data_ref="input" label="Column containing the read nucleotide" />\n+ <!-- \n+ <param name="sname" type="text" size="15" value="User Track-2" label="SNP track name">\n+ <validator type="length" max="15"/>\n+ </param>\n+ -->\n+ <param name="sdescription" type="text" value="User Supplied Track (from Galaxy)" label="SNP track description">\n+ <validator type="length" max="60" size="15"/>\n+ </param>\n+ <param label="SNP track Visibility" name="svisibility" type="select">\n+ <option selected="yes" value="1">Dense</option>\n+ <option value="2">Full</option>\n+ <option value="3">Pack</option>\n+ <option value="4">Squish</option>\n+ <option value="0">Hide</option>\n+ </param>\n+ </when>\n+ \n+ <when value = "both">\n+ <param name="coverage_col" type="data_column" data_ref='..b'255-215-0">Gold</option>\n+ <option value="160-32-240">Purple</option>\n+ <option value="255-140-0">Orange</option>\n+ <option value="255-20-147">Pink</option>\n+ <option value="92-51-23">Dark Chocolate</option>\n+ <option value="85-107-47">Olive green</option>\n+ </param>\n+ <param label="Coverage track Visibility" name="cvisibility" type="select">\n+ <option selected="yes" value="1">Dense</option>\n+ <option value="2">Full</option>\n+ <option value="3">Pack</option>\n+ <option value="4">Squish</option>\n+ <option value="0">Hide</option>\n+ </param>\n+ <!-- \n+ <param name="col1" type="data_column" data_ref="input" label="Column containing the reference nucleotide" />\n+ -->\n+ <param name="col2" type="data_column" data_ref="input" label="Column containing the read nucleotide" />\n+ <!-- \n+ <param name="sname" type="text" size="15" value="User Track-2" label="SNP track name">\n+ <validator type="length" max="15"/>\n+ </param>\n+ -->\n+ <param name="sdescription" type="text" size="15" value="User Supplied Track (from Galaxy)" label="SNP track description">\n+ <validator type="length" max="60"/>\n+ </param>\n+ <param label="SNP track Visibility" name="svisibility" type="select">\n+ <option selected="yes" value="1">Dense</option>\n+ <option value="2">Full</option>\n+ <option value="3">Pack</option>\n+ <option value="4">Squish</option>\n+ <option value="0">Hide</option>\n+ </param>\n+ </when>\n+ </conditional>\n+ </inputs>\n+ <outputs>\n+ <data format="customtrack" name="out_file1"/>\n+ </outputs>\n+\n+ \n+ <help> \n+\n+.. class:: infomark\n+\n+**What it does**\n+\n+This tool turns mapping data generated by short read mappers into a format that can be displayed in the UCSC genome browser as a custom track. \n+\n+-----\n+\n+.. class:: warningmark\n+\n+**Note**\n+\n+This tool requires the mapping data to contain at least the following information: \n+\n+chromosome, genome coordinate, read nucleotide (if option to display is SNPs), read coverage (if option to display is Read coverage). \n+\n+-----\n+\n+**Example**\n+\n+For the following Mapping data::\n+\n+ #chr g_start read_id read_coord g_nt read_nt qual read_coverage\n+ chrM 1 1:29:1672:1127/1 11 G G 40 134\n+ chrM 1 1:32:93:933/1 4 G A 40 134\n+ chrM 1 1:34:116:2032/1 11 G A 40 134\n+ chrM 1 1:39:207:964/1 1 G G 40 134\n+ chrM 2 1:3:359:848/1 1 G C 40 234\n+ chrM 2 1:40:1435:1013/1 1 G G 40 234\n+ chrM 3 1:40:730:972/1 9 G G 40 334\n+ chrM 4 1:42:1712:921/2 31 G T 35 434\n+ chrM 4 1:44:1649:493/1 4 G G 40 434\n+\n+running this tool to display both SNPs and Read coverage will return the following tracks, containing aggregated data per genome co-ordinate::\n+\n+ track type=wiggle_0 name="Coverage Track" description="User Supplied Track (from Galaxy)" color=0,0,0 visibility=1\n+ variableStep chrom=chrM\n+ 1 134\n+ 2 234\n+ 3 334\n+ 4 434\n+ track type=wiggle_0 name="Track A" description="User Supplied SNP Track (from Galaxy)" color=255,0,0 visibility=1\n+ variableStep chrom=chrM\n+ 1 2\n+ track type=wiggle_0 name="Track T" description="User Supplied SNP Track (from Galaxy)" color=0,255,0 visibility=1\n+ variableStep chrom=chrM\n+ 4 1\n+ track type=wiggle_0 name="Track G" description="User Supplied SNP Track (from Galaxy)" color=0,0,255 visibility=1\n+ variableStep chrom=chrM\n+ 1 2\n+ 2 1\n+ 3 1\n+ 4 1\n+ track type=wiggle_0 name="Track C" description="User Supplied SNP Track (from Galaxy)" color=255,0,255 visibility=1\n+ variableStep chrom=chrM\n+ 2 1\n+ \n+ </help> \n+</tool>\n' |