# HG changeset patch # User jjohnson # Date 1326855205 18000 # Node ID 2eb1e2924c1a90d598c5229ca2cdb2191381740c Uploaded diff -r 000000000000 -r 2eb1e2924c1a igvtools --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,2 @@ +#!/bin/sh +java -Xmx1500m -jar `dirname $0`/igvtools.jar $* diff -r 000000000000 -r 2eb1e2924c1a igvtools.jar Binary file igvtools.jar has changed diff -r 000000000000 -r 2eb1e2924c1a igvtools_count.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools_count.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,137 @@ + + average feature density across the genome + igvtools count + #if $zoom.__str__ != '': + -z $zoom + #end if + #if $window.__str__ != '': + -w $window + #end if + #if $extend.__str__ != '': + -e $extend + #end if + #if $window_functions.__str__ != '': + -f '$window_functions' + #end if + ## IGVTools relies on the file extension to determine format + #if $input.datatype.file_ext == 'bam': + #set $input_name='input_file.bam' + #elif $input.datatype.file_ext == 'sam': + #set $input_name='input_file.sam' + #elif $input.datatype.file_ext == 'bed': + #set $input_name='input_file.bed' + #elif $input.datatype.file_ext == 'psl': + #set $input_name='input_file.psl' + #end if + `ln -s $input $input_name; echo $input_name` '$output_fmt' $refGenomeSource.ref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ('output.tdf' in output_fmt) + + + ('output.wig' in output_fmt) + + + + + +**What it does** + +The IGVTools_ count command computes average feature density over a specified window size across the genome. Common usages include computing coverage for alignment files and counting hits in Chip-seq experiments. By default, the resulting file will be displayed as a bar chart when loaded into IGV_. + +.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline +.. _IGV: http://www.broadinstitute.org/igv/ + +------ + +To cite your use of IGV in your publication:: + + James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. + Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011) + +------ + +**Input formats** + +Supported input file formats are: .sam, .bam, .aligned, .psl, .pslx, and .bed. + +------ + +**Outputs** + +The output formats are IGV tiled data file (TDF) file (.tdf) and/or WIG file (.wig) + +------- + + +**IGVTools count parameter list** + +This is an exhaustive list of igvtools count options: + +For **count**:: + + -z Integer Specifies the maximum zoom level to precompute. The default + value is 7 and is sufficient for most files. To reduce file + size at the expense of IGV performance this value can be + reduced. + + -w Integer The window size over which coverage is averaged. Defaults to 25 bp. + + -e Integer The read or feature is extended by the specified distance + in bp prior to counting. This option is useful for chip-seq + and rna-seq applications. The value is generally set to the + average fragment length of the library. + + -f list A comma delimited list specifying window functions to use + when reducing the data to precomputed tiles. Possible + values are min, max, and mean. By default only the mean + is calculated. + + + + + diff -r 000000000000 -r 2eb1e2924c1a igvtools_sort.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools_sort.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,80 @@ + + input file by start position + igvtools sort -t . + ## IGVTools relies on the file extension to determine format + #if $input.datatype.file_ext == 'vcf': + #set $input_name='input_file.vcf' + #set $output_name='output.vcf' + #elif $input.datatype.file_ext == 'sam': + #set $input_name='input_file.sam' + #set $output_name='output.sam' + #elif $input.datatype.file_ext == 'bed': + #set $input_name='input_file.bed' + #set $output_name='output.bed' + #elif $input.datatype.file_ext == 'psl': + #set $input_name='input_file.psl' + #set $output_name='output.psl' + #elif $input.datatype.file_ext == 'igv': + #set $input_name='input_file.igv' + #set $output_name='output.igv' + #elif $input.datatype.file_ext == 'igv.cn': + #set $input_name='input_file.cn' + #set $output_name='output.cn' + #end if + `ln -s $input $input_name; echo $input_name` $output_name + + + + + + + (input.datatype.file_ext == 'sam') + + + (input.datatype.file_ext == 'bed') + + + (input.datatype.file_ext == 'vcf') + + + (input.datatype.file_ext == 'psl') + + + (input.datatype.file_ext == 'igv') + + + (input.datatype.file_ext == 'igv.cn') + + + + + +**What it does** + +The IGVTools_ sort command sorts the input file by start position, as required. + +.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline +.. _IGV: http://www.broadinstitute.org/igv/ + +------ + +To cite your use of IGV in your publication:: + + James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. + Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011) + +------ + +**Input formats** + +Supported input file formats are: .cn, .igv, .sam, .aligned, .psl, .bed, and .vcf. + +------ + +**Outputs** + +The output will have the same format as the input file. + + + diff -r 000000000000 -r 2eb1e2924c1a igvtools_tile.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools_tile.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,125 @@ + + convert a sorted data input file to a binary tiled data (.tdf) file + igvtools tile + #if $zoom.__str__ != '': + -z $zoom + #end if + #if $window_functions.__str__ != '': + -f '$window_functions' + #end if + #if $probe.__str__ != '': + -p '$probe' + #end if + ## IGVTools relies on the file extension to determine format + #if $input.datatype.file_ext == 'wig': + #set $input_name='input_file.wig' + #elif $input.datatype.file_ext == 'igv.snp': + #set $input_name='input_file.snp' + #elif $input.datatype.file_ext == 'igv.gct': + #set $input_name='input_file.gct' + #elif $input.datatype.file_ext == 'igv.cn': + #set $input_name='input_file.cn' + #elif $input.datatype.file_ext == 'igv': + #set $input_name='input_file.igv' + #end if + `ln -s $input $input_name; echo $input_name` $output_tdf $refGenomeSource.ref + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +The IGVTools_ tile command converts a sorted data input file to a binary tiled data (.tdf) file. Use this command to pre-process large datasets for improved IGV performance. + +.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline +.. _IGV: http://www.broadinstitute.org/igv/ + +------ + +To cite your use of IGV in your publication:: + + James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. + Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011) + +------ + +**Input formats** + +Supported input file formats are: .wig, .cn, .snp, .igv, and .gct. + +------ + +**Outputs** + +The output format is IGV tiled data file (TDF) file (.tdf) + +------- + + +**IGVTools count parameter list** + +This is an exhaustive list of igvtools count options: + +For **count**:: + + -z Integer Specifies the maximum zoom level to precompute. The default + value is 7 and is sufficient for most files. To reduce file + size at the expense of IGV performance this value can be + reduced. + + -p file Specifies a "bed" file to be used to map probe identifiers + to locations. This option is useful when preprocessing . gct + files. The bed file should contain 4 columns: + chr start end name + where name is the probe name in the .gct file. + + -f list A comma delimited list specifying window functions to use + when reducing the data to precomputed tiles. Possible + values are min, max, and mean. By default only the mean + is calculated. + + + + + diff -r 000000000000 -r 2eb1e2924c1a lib/galaxy/datatypes/igv.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/igv.py Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,49 @@ +""" +IGV datatypes +""" +import logging,zipfile +import galaxy.datatypes.binary +from galaxy.datatypes.binary import Binary + +log = logging.getLogger(__name__) + +class TiledDataFile( Binary ): + """Class describing an IGV tiled data file (TDF) .tdf binary file""" + file_ext = "igv.tdf" + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + + def sniff( self, filename ): + # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details + # about the format, see http://www.broadinstitute.org/software/igv/TDF + try: + header = open( filename ).read(4) + if binascii.b2a_hex( header ) == binascii.hexlify( 'TDF3' ): + return True + return False + except: + return False + +class GenomeDescriptor( Binary ): + """Class describing an IGV .genome zip archive file""" + file_ext = "igv.genome" + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + + def sniff( self, filename ): + # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details + # about the format, see http://www.broadinstitute.org/software/igv/TDF + # The zipfile should contain a file named 'property.txt' which should have a key named 'sequenceLocation' + try: + if filename != None and zipfile.is_zipfile(filename): + genome_archive = zipfile.ZipFile(filename) + if 'property.txt' in genome_archive.namelist(): + fh = genome_archive.open('property.txt') + for i,l in enumerate(fh): + if l.startswith('sequenceLocation'): + return True + return False + except: + return False diff -r 000000000000 -r 2eb1e2924c1a tool-data/datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/datatypes_conf.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 2eb1e2924c1a tool-data/igv_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/igv_indices.loc.sample Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,8 @@ +# IGVTools .genome files +# from http://www.broadinstitute.org/software/igv/download +# igvtools that includes .genome files +# taken from: igvtools_1.5.16.zip includes .genome files (148 MB) +# +# format of this .loc file (3 tab-separated columns): +#dbkey description filepath +#hg19 Human hg19 /depot/data2/galaxy/IGV/2.0/hg19.genome