changeset 0:2eb1e2924c1a

Uploaded
author jjohnson
date Tue, 17 Jan 2012 21:53:25 -0500
parents
children ae2bc4e5fefc
files igvtools igvtools.jar igvtools_count.xml igvtools_sort.xml igvtools_tile.xml lib/galaxy/datatypes/igv.py tool-data/datatypes_conf.xml tool-data/igv_indices.loc.sample
diffstat 8 files changed, 420 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools	Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,2 @@
+#!/bin/sh
+java -Xmx1500m  -jar `dirname $0`/igvtools.jar $*
Binary file igvtools.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools_count.xml	Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,137 @@
+<tool id="igvtools_count" name="IGVtools count" version="1.0">
+  <description>average feature density across the genome</description>
+  <command interpreter="bash">igvtools count 
+    #if $zoom.__str__ != '':
+      -z $zoom
+    #end if
+    #if $window.__str__ != '':
+      -w $window
+    #end if
+    #if $extend.__str__ != '':
+      -e $extend
+    #end if
+    #if $window_functions.__str__ != '':
+      -f '$window_functions'
+    #end if
+    ## IGVTools relies on the file extension to determine format
+    #if $input.datatype.file_ext == 'bam':
+       #set $input_name='input_file.bam'
+    #elif $input.datatype.file_ext == 'sam':
+       #set $input_name='input_file.sam'
+    #elif $input.datatype.file_ext == 'bed':
+       #set $input_name='input_file.bed'
+    #elif $input.datatype.file_ext == 'psl':
+       #set $input_name='input_file.psl'
+    #end if
+    `ln -s $input  $input_name; echo $input_name` '$output_fmt' $refGenomeSource.ref
+  </command>
+  <inputs>
+    <conditional name="refGenomeSource">
+      <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in reference?">
+        <option value="built-in">Use a built-in reference</option>
+        <option value="history">Use one from the history</option>
+      </param>
+      <when value="built-in">
+        <param name="ref" type="select" label="Select a reference genome">
+          <options from_file="igv_indices.loc">
+            <column name="dbkey" index="0" />
+            <column name="name" index="1" />
+            <column name="value" index="2" />
+            <filter type="sort_by" column="1" />
+            <validator type="no_options" message="No indexes are available" />
+          </options>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ref" type="data" format="igv.genome" metadata_name="dbkey" label="Select a reference from history" />
+      </when>
+    </conditional>
+    <param name="input" type="data" format="sam,bam,bed,psl" label="Input file" help="The input BAM,SAM,BED,PSL feature file"/>
+    <param name="zoom" type="integer" value="7" optional="true" label="-z maximum zoom level to precompute" 
+           help="The default value is 7 and is sufficient for most files. To reduce file
+               size at the expense of IGV performance this value can be reduced." />
+    <param name="window" type="integer" value="25" optional="true" label="-w Window size" 
+           help="The window size over which coverage is averaged. Defaults to 25 bp." />
+    <param name="extend" type="integer" value="" optional="true" label="Extend feature length" 
+           help="The read or feature is extended by the specified distance in bp prior to counting. 
+                 This option is useful for chip-seq and rna-seq applications. The value is generally set to the
+                 average fragment length of the library." />
+    <param name="window_functions" type="select" display="checkboxes" multiple="True" label="-f Functions to calculate over windows" 
+           help="If none are selected, will default to mean">
+        <option value="mean" selected="true">mean</option>
+        <option value="min">min</option>
+        <option value="max">max</option>
+    </param>
+    <param name="output_fmt" type="select" display="checkboxes" multiple="True" force_select="true" label="Select output format" 
+           help="If none are selected, will default to mean">
+        <option value="output.tdf" selected="true">IGV tdf</option>
+        <option value="output.wig">wig</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="igv.tdf" name="output_tdf" metadata_source="input" label="${tool.name} on ${on_string}: igv.tdf" from_work_dir="output.tdf">
+      <filter>('output.tdf' in output_fmt)</filter>
+    </data>
+    <data format="wig" name="output_wig" metadata_source="input" label="${tool.name} on ${on_string}: igv.wig" from_work_dir="output.wig">
+      <filter>('output.wig' in output_fmt)</filter>
+    </data>
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+**What it does**
+
+The IGVTools_ count command computes average feature density over a specified window size across the genome. Common usages include computing coverage for alignment files and counting hits in Chip-seq experiments. By default, the resulting file will be displayed as a bar chart when loaded into IGV_.
+
+.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline
+.. _IGV: http://www.broadinstitute.org/igv/
+
+------
+
+To cite your use of IGV in your publication::
+
+  James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. 
+  Integrative Genomics Viewer.  Nature Biotechnology 29, 24-26 (2011)
+
+------
+
+**Input formats**
+
+Supported input file formats are: .sam, .bam, .aligned, .psl, .pslx, and .bed.
+
+------
+
+**Outputs**
+
+The output formats are IGV tiled data file (TDF) file (.tdf) and/or WIG file (.wig)
+
+-------
+
+
+**IGVTools count  parameter list**
+
+This is an exhaustive list of igvtools count options:
+
+For **count**::
+
+  -z Integer        Specifies the maximum zoom level to precompute. The default
+                    value is 7 and is sufficient for most files. To reduce file
+                    size at the expense of IGV performance this value can be
+                    reduced.
+  
+  -w Integer        The window size over which coverage is averaged. Defaults to 25 bp.
+  
+  -e Integer        The read or feature is extended by the specified distance
+                    in bp prior to counting. This option is useful for chip-seq
+                    and rna-seq applications. The value is generally set to the
+                    average fragment length of the library.
+  
+  -f list           A comma delimited list specifying window functions to use
+                    when reducing the data to precomputed tiles.   Possible
+                    values are  min, max, and mean.  By default only the mean
+                    is calculated.
+
+
+  
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools_sort.xml	Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,80 @@
+<tool id="igvtools_sort" name="IGVtools sort" version="1.0">
+  <description>input file by start position</description>
+  <command interpreter="bash">igvtools sort -t .
+    ## IGVTools relies on the file extension to determine format
+    #if $input.datatype.file_ext == 'vcf':
+       #set $input_name='input_file.vcf'
+       #set $output_name='output.vcf'
+    #elif $input.datatype.file_ext == 'sam':
+       #set $input_name='input_file.sam'
+       #set $output_name='output.sam'
+    #elif $input.datatype.file_ext == 'bed':
+       #set $input_name='input_file.bed'
+       #set $output_name='output.bed'
+    #elif $input.datatype.file_ext == 'psl':
+       #set $input_name='input_file.psl'
+       #set $output_name='output.psl'
+    #elif $input.datatype.file_ext == 'igv':
+       #set $input_name='input_file.igv'
+       #set $output_name='output.igv'
+    #elif $input.datatype.file_ext == 'igv.cn':
+       #set $input_name='input_file.cn'
+       #set $output_name='output.cn'
+    #end if
+    `ln -s $input  $input_name; echo $input_name` $output_name
+  </command>
+  <inputs>
+    <param name="input" type="data" format="sam,bed,vcf,psl,igv,igv.cn," label="Input file SAM,BED,VCF format"
+           help="Use samtools or picard to sort bam files"/>
+  </inputs>
+  <outputs>
+    <data format_source="input" name="output_sam" metadata_source="input" label="${tool.name} on ${on_string}: igv.sam" from_work_dir="output.sam">
+      <filter>(input.datatype.file_ext == 'sam')</filter>
+    </data>
+    <data format_source="input" name="output_bed" metadata_source="input" label="${tool.name} on ${on_string}: igv.bed" from_work_dir="output.bed">
+      <filter>(input.datatype.file_ext == 'bed')</filter>
+    </data>
+    <data format_source="input" name="output_vcf" metadata_source="input" label="${tool.name} on ${on_string}: igv.vcf" from_work_dir="output.vcf">
+      <filter>(input.datatype.file_ext == 'vcf')</filter>
+    </data>
+    <data format_source="input" name="output_psl" metadata_source="input" label="${tool.name} on ${on_string}: igv.vcf" from_work_dir="output.psl">
+      <filter>(input.datatype.file_ext == 'psl')</filter>
+    </data>
+    <data format_source="input" name="output_igv" metadata_source="input" label="${tool.name} on ${on_string}: igv" from_work_dir="output.igv">
+      <filter>(input.datatype.file_ext == 'igv')</filter>
+    </data>
+    <data format_source="input" name="output_cn" metadata_source="input" label="${tool.name} on ${on_string}: igv.cn" from_work_dir="output.cn">
+      <filter>(input.datatype.file_ext == 'igv.cn')</filter>
+    </data>
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+**What it does**
+
+The IGVTools_ sort command sorts the input file by start position, as required.
+
+.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline
+.. _IGV: http://www.broadinstitute.org/igv/
+
+------
+
+To cite your use of IGV in your publication::
+
+  James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. 
+  Integrative Genomics Viewer.  Nature Biotechnology 29, 24-26 (2011)
+
+------
+
+**Input formats**
+
+Supported input file formats are: .cn, .igv, .sam, .aligned, .psl, .bed, and .vcf.
+
+------
+
+**Outputs**
+
+The output will have the same format as the input file.
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/igvtools_tile.xml	Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,125 @@
+<tool id="igvtools_tile" name="IGVtools tile" version="1.0">
+  <description>convert a sorted data input file to a binary tiled data (.tdf) file</description>
+  <command interpreter="bash">igvtools tile 
+    #if $zoom.__str__ != '':
+      -z $zoom
+    #end if
+    #if $window_functions.__str__ != '':
+      -f '$window_functions'
+    #end if
+    #if $probe.__str__ != '':
+      -p '$probe'
+    #end if
+    ## IGVTools relies on the file extension to determine format
+    #if $input.datatype.file_ext == 'wig':
+       #set $input_name='input_file.wig'
+    #elif $input.datatype.file_ext == 'igv.snp':
+       #set $input_name='input_file.snp'
+    #elif $input.datatype.file_ext == 'igv.gct':
+       #set $input_name='input_file.gct'
+    #elif $input.datatype.file_ext == 'igv.cn':
+       #set $input_name='input_file.cn'
+    #elif $input.datatype.file_ext == 'igv':
+       #set $input_name='input_file.igv'
+    #end if
+    `ln -s $input  $input_name; echo $input_name` $output_tdf $refGenomeSource.ref
+  </command>
+  <inputs>
+    <conditional name="refGenomeSource">
+      <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in reference?">
+        <option value="built-in">Use a built-in reference</option>
+        <option value="history">Use one from the history</option>
+      </param>
+      <when value="built-in">
+        <param name="ref" type="select" label="Select a reference genome">
+          <options from_file="igv_indices.loc">
+            <column name="dbkey" index="0" />
+            <column name="name" index="1" />
+            <column name="value" index="2" />
+            <filter type="sort_by" column="1" />
+            <validator type="no_options" message="No indexes are available" />
+          </options>
+        </param>
+      </when>
+      <when value="history">
+        <param name="ref" type="data" format="igv.genome" metadata_name="dbkey" label="Select a reference from history" />
+      </when>
+    </conditional>
+    <param name="input" type="data" format="wig,igv,igv.cn,igv.snp,igv.gct" label="Input file" help="The input WIG,CN,IGV,GCT,SNP feature file"/>
+    <param name="zoom" type="integer" value="7" optional="true" label="-z maximum zoom level to precompute" 
+           help="The default value is 7 and is sufficient for most files. To reduce file
+               size at the expense of IGV performance this value can be reduced." />
+    <param name="window_functions" type="select" display="checkboxes" multiple="True" label="-f Functions to calculate over windows" 
+           help="If none are selected, will default to mean">
+        <option value="mean" selected="true">mean</option>
+        <option value="min">min</option>
+        <option value="max">max</option>
+    </param>
+    <param name="probe" type="data" format="bed" optional="true" label="Probe file for GCT input" 
+           help="Specifies a bed file to be used to map probe identifiers to locations.  
+                 This option is useful when preprocessing .gct files.  
+                 The bed file should contain 4 columns:
+                   chr start end name
+                 where name is the probe name in the .gct file."/>
+  </inputs>
+  <outputs>
+    <data format="igv.tdf" name="output_tdf" metadata_source="input" label="${tool.name} on ${on_string}: igv.tdf" from_work_dir="output.tdf"/>
+  </outputs>
+  <tests>
+  </tests>
+  <help>
+**What it does**
+
+The IGVTools_ tile command converts a sorted data input file to a binary tiled data (.tdf) file. Use this command to pre-process large datasets for improved IGV performance. 
+
+.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline
+.. _IGV: http://www.broadinstitute.org/igv/
+
+------
+
+To cite your use of IGV in your publication::
+
+  James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. 
+  Integrative Genomics Viewer.  Nature Biotechnology 29, 24-26 (2011)
+
+------
+
+**Input formats**
+
+Supported input file formats are: .wig, .cn, .snp, .igv, and .gct.
+
+------
+
+**Outputs**
+
+The output format is IGV tiled data file (TDF) file (.tdf)
+
+-------
+
+
+**IGVTools count  parameter list**
+
+This is an exhaustive list of igvtools count options:
+
+For **count**::
+
+  -z Integer        Specifies the maximum zoom level to precompute. The default
+                    value is 7 and is sufficient for most files. To reduce file
+                    size at the expense of IGV performance this value can be
+                    reduced.
+  
+  -p file           Specifies a "bed" file to be used to map probe identifiers
+                    to locations.  This option is useful when preprocessing . gct
+                    files.  The bed file should contain 4 columns:
+                    chr start end name
+                    where name is the probe name in the .gct file.
+  
+  -f list           A comma delimited list specifying window functions to use
+                    when reducing the data to precomputed tiles.   Possible
+                    values are  min, max, and mean.  By default only the mean
+                    is calculated.
+
+
+  
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/galaxy/datatypes/igv.py	Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,49 @@
+"""
+IGV datatypes
+"""
+import logging,zipfile
+import galaxy.datatypes.binary
+from galaxy.datatypes.binary import Binary
+
+log = logging.getLogger(__name__)
+
+class TiledDataFile( Binary ):
+    """Class describing an IGV tiled data file (TDF) .tdf  binary file"""
+    file_ext = "igv.tdf"
+
+    def __init__( self, **kwd ):
+        Binary.__init__( self, **kwd )
+
+    def sniff( self, filename ):
+        # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details
+        # about the format, see http://www.broadinstitute.org/software/igv/TDF
+        try:
+            header = open( filename ).read(4)
+            if binascii.b2a_hex( header ) == binascii.hexlify( 'TDF3' ):
+                return True
+            return False
+        except:
+            return False
+
+class GenomeDescriptor( Binary ):
+    """Class describing an IGV .genome zip archive  file"""
+    file_ext = "igv.genome"
+
+    def __init__( self, **kwd ):
+        Binary.__init__( self, **kwd )
+
+    def sniff( self, filename ):
+        # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details
+        # about the format, see http://www.broadinstitute.org/software/igv/TDF
+        # The zipfile should contain a file named 'property.txt' which should have a key named 'sequenceLocation'
+        try:
+            if filename != None and zipfile.is_zipfile(filename):
+                genome_archive = zipfile.ZipFile(filename)
+                if 'property.txt' in genome_archive.namelist():
+                    fh = genome_archive.open('property.txt')
+                    for i,l in enumerate(fh):
+                        if l.startswith('sequenceLocation'):
+                            return True
+            return False
+        except:
+            return False
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/datatypes_conf.xml	Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="igv.py"/>
+    </datatype_files>
+    <registration>
+        <datatype extension="igv" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/>
+        <datatype extension="igv.snp" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/>
+        <datatype extension="igv.cn" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/>
+        <datatype extension="igv.gct" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/>
+        <datatype extension="igv.res" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/>
+        <datatype extension="igv.tdf" type="galaxy.datatypes.igv:TiledDataFile" display_in_upload="True"/>
+        <datatype extension="igv.genome" type="galaxy.datatypes.igv:GenomeDescriptor" display_in_upload="True"/>
+    </registration>
+    <sniffers>
+        <sniffer type="galaxy.datatypes.igv:TiledDataFile"/>
+        <sniffer type="galaxy.datatypes.igv:GenomeDescriptor"/>
+    </sniffers>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/igv_indices.loc.sample	Tue Jan 17 21:53:25 2012 -0500
@@ -0,0 +1,8 @@
+# IGVTools .genome files
+# from http://www.broadinstitute.org/software/igv/download
+#   igvtools that includes .genome files 
+#   taken from:   igvtools_1.5.16.zip     includes .genome files (148 MB)
+#
+#  format of this .loc file (3 tab-separated columns):
+#dbkey	description	filepath
+#hg19	Human hg19	/depot/data2/galaxy/IGV/2.0/hg19.genome