# HG changeset patch
# User jjohnson
# Date 1326855205 18000
# Node ID 2eb1e2924c1a90d598c5229ca2cdb2191381740c
Uploaded
diff -r 000000000000 -r 2eb1e2924c1a igvtools
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,2 @@
+#!/bin/sh
+java -Xmx1500m -jar `dirname $0`/igvtools.jar $*
diff -r 000000000000 -r 2eb1e2924c1a igvtools.jar
Binary file igvtools.jar has changed
diff -r 000000000000 -r 2eb1e2924c1a igvtools_count.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools_count.xml Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,137 @@
+
+ average feature density across the genome
+ igvtools count
+ #if $zoom.__str__ != '':
+ -z $zoom
+ #end if
+ #if $window.__str__ != '':
+ -w $window
+ #end if
+ #if $extend.__str__ != '':
+ -e $extend
+ #end if
+ #if $window_functions.__str__ != '':
+ -f '$window_functions'
+ #end if
+ ## IGVTools relies on the file extension to determine format
+ #if $input.datatype.file_ext == 'bam':
+ #set $input_name='input_file.bam'
+ #elif $input.datatype.file_ext == 'sam':
+ #set $input_name='input_file.sam'
+ #elif $input.datatype.file_ext == 'bed':
+ #set $input_name='input_file.bed'
+ #elif $input.datatype.file_ext == 'psl':
+ #set $input_name='input_file.psl'
+ #end if
+ `ln -s $input $input_name; echo $input_name` '$output_fmt' $refGenomeSource.ref
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ('output.tdf' in output_fmt)
+
+
+ ('output.wig' in output_fmt)
+
+
+
+
+
+**What it does**
+
+The IGVTools_ count command computes average feature density over a specified window size across the genome. Common usages include computing coverage for alignment files and counting hits in Chip-seq experiments. By default, the resulting file will be displayed as a bar chart when loaded into IGV_.
+
+.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline
+.. _IGV: http://www.broadinstitute.org/igv/
+
+------
+
+To cite your use of IGV in your publication::
+
+ James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov.
+ Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011)
+
+------
+
+**Input formats**
+
+Supported input file formats are: .sam, .bam, .aligned, .psl, .pslx, and .bed.
+
+------
+
+**Outputs**
+
+The output formats are IGV tiled data file (TDF) file (.tdf) and/or WIG file (.wig)
+
+-------
+
+
+**IGVTools count parameter list**
+
+This is an exhaustive list of igvtools count options:
+
+For **count**::
+
+ -z Integer Specifies the maximum zoom level to precompute. The default
+ value is 7 and is sufficient for most files. To reduce file
+ size at the expense of IGV performance this value can be
+ reduced.
+
+ -w Integer The window size over which coverage is averaged. Defaults to 25 bp.
+
+ -e Integer The read or feature is extended by the specified distance
+ in bp prior to counting. This option is useful for chip-seq
+ and rna-seq applications. The value is generally set to the
+ average fragment length of the library.
+
+ -f list A comma delimited list specifying window functions to use
+ when reducing the data to precomputed tiles. Possible
+ values are min, max, and mean. By default only the mean
+ is calculated.
+
+
+
+
+
diff -r 000000000000 -r 2eb1e2924c1a igvtools_sort.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools_sort.xml Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,80 @@
+
+ input file by start position
+ igvtools sort -t .
+ ## IGVTools relies on the file extension to determine format
+ #if $input.datatype.file_ext == 'vcf':
+ #set $input_name='input_file.vcf'
+ #set $output_name='output.vcf'
+ #elif $input.datatype.file_ext == 'sam':
+ #set $input_name='input_file.sam'
+ #set $output_name='output.sam'
+ #elif $input.datatype.file_ext == 'bed':
+ #set $input_name='input_file.bed'
+ #set $output_name='output.bed'
+ #elif $input.datatype.file_ext == 'psl':
+ #set $input_name='input_file.psl'
+ #set $output_name='output.psl'
+ #elif $input.datatype.file_ext == 'igv':
+ #set $input_name='input_file.igv'
+ #set $output_name='output.igv'
+ #elif $input.datatype.file_ext == 'igv.cn':
+ #set $input_name='input_file.cn'
+ #set $output_name='output.cn'
+ #end if
+ `ln -s $input $input_name; echo $input_name` $output_name
+
+
+
+
+
+
+ (input.datatype.file_ext == 'sam')
+
+
+ (input.datatype.file_ext == 'bed')
+
+
+ (input.datatype.file_ext == 'vcf')
+
+
+ (input.datatype.file_ext == 'psl')
+
+
+ (input.datatype.file_ext == 'igv')
+
+
+ (input.datatype.file_ext == 'igv.cn')
+
+
+
+
+
+**What it does**
+
+The IGVTools_ sort command sorts the input file by start position, as required.
+
+.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline
+.. _IGV: http://www.broadinstitute.org/igv/
+
+------
+
+To cite your use of IGV in your publication::
+
+ James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov.
+ Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011)
+
+------
+
+**Input formats**
+
+Supported input file formats are: .cn, .igv, .sam, .aligned, .psl, .bed, and .vcf.
+
+------
+
+**Outputs**
+
+The output will have the same format as the input file.
+
+
+
diff -r 000000000000 -r 2eb1e2924c1a igvtools_tile.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools_tile.xml Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,125 @@
+
+ convert a sorted data input file to a binary tiled data (.tdf) file
+ igvtools tile
+ #if $zoom.__str__ != '':
+ -z $zoom
+ #end if
+ #if $window_functions.__str__ != '':
+ -f '$window_functions'
+ #end if
+ #if $probe.__str__ != '':
+ -p '$probe'
+ #end if
+ ## IGVTools relies on the file extension to determine format
+ #if $input.datatype.file_ext == 'wig':
+ #set $input_name='input_file.wig'
+ #elif $input.datatype.file_ext == 'igv.snp':
+ #set $input_name='input_file.snp'
+ #elif $input.datatype.file_ext == 'igv.gct':
+ #set $input_name='input_file.gct'
+ #elif $input.datatype.file_ext == 'igv.cn':
+ #set $input_name='input_file.cn'
+ #elif $input.datatype.file_ext == 'igv':
+ #set $input_name='input_file.igv'
+ #end if
+ `ln -s $input $input_name; echo $input_name` $output_tdf $refGenomeSource.ref
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**What it does**
+
+The IGVTools_ tile command converts a sorted data input file to a binary tiled data (.tdf) file. Use this command to pre-process large datasets for improved IGV performance.
+
+.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline
+.. _IGV: http://www.broadinstitute.org/igv/
+
+------
+
+To cite your use of IGV in your publication::
+
+ James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov.
+ Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011)
+
+------
+
+**Input formats**
+
+Supported input file formats are: .wig, .cn, .snp, .igv, and .gct.
+
+------
+
+**Outputs**
+
+The output format is IGV tiled data file (TDF) file (.tdf)
+
+-------
+
+
+**IGVTools count parameter list**
+
+This is an exhaustive list of igvtools count options:
+
+For **count**::
+
+ -z Integer Specifies the maximum zoom level to precompute. The default
+ value is 7 and is sufficient for most files. To reduce file
+ size at the expense of IGV performance this value can be
+ reduced.
+
+ -p file Specifies a "bed" file to be used to map probe identifiers
+ to locations. This option is useful when preprocessing . gct
+ files. The bed file should contain 4 columns:
+ chr start end name
+ where name is the probe name in the .gct file.
+
+ -f list A comma delimited list specifying window functions to use
+ when reducing the data to precomputed tiles. Possible
+ values are min, max, and mean. By default only the mean
+ is calculated.
+
+
+
+
+
diff -r 000000000000 -r 2eb1e2924c1a lib/galaxy/datatypes/igv.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/igv.py Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,49 @@
+"""
+IGV datatypes
+"""
+import logging,zipfile
+import galaxy.datatypes.binary
+from galaxy.datatypes.binary import Binary
+
+log = logging.getLogger(__name__)
+
+class TiledDataFile( Binary ):
+ """Class describing an IGV tiled data file (TDF) .tdf binary file"""
+ file_ext = "igv.tdf"
+
+ def __init__( self, **kwd ):
+ Binary.__init__( self, **kwd )
+
+ def sniff( self, filename ):
+ # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details
+ # about the format, see http://www.broadinstitute.org/software/igv/TDF
+ try:
+ header = open( filename ).read(4)
+ if binascii.b2a_hex( header ) == binascii.hexlify( 'TDF3' ):
+ return True
+ return False
+ except:
+ return False
+
+class GenomeDescriptor( Binary ):
+ """Class describing an IGV .genome zip archive file"""
+ file_ext = "igv.genome"
+
+ def __init__( self, **kwd ):
+ Binary.__init__( self, **kwd )
+
+ def sniff( self, filename ):
+ # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details
+ # about the format, see http://www.broadinstitute.org/software/igv/TDF
+ # The zipfile should contain a file named 'property.txt' which should have a key named 'sequenceLocation'
+ try:
+ if filename != None and zipfile.is_zipfile(filename):
+ genome_archive = zipfile.ZipFile(filename)
+ if 'property.txt' in genome_archive.namelist():
+ fh = genome_archive.open('property.txt')
+ for i,l in enumerate(fh):
+ if l.startswith('sequenceLocation'):
+ return True
+ return False
+ except:
+ return False
diff -r 000000000000 -r 2eb1e2924c1a tool-data/datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/datatypes_conf.xml Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 2eb1e2924c1a tool-data/igv_indices.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/igv_indices.loc.sample Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,8 @@
+# IGVTools .genome files
+# from http://www.broadinstitute.org/software/igv/download
+# igvtools that includes .genome files
+# taken from: igvtools_1.5.16.zip includes .genome files (148 MB)
+#
+# format of this .loc file (3 tab-separated columns):
+#dbkey description filepath
+#hg19 Human hg19 /depot/data2/galaxy/IGV/2.0/hg19.genome