Mercurial > repos > jjohnson > igvtools
changeset 0:2eb1e2924c1a
Uploaded
author | jjohnson |
---|---|
date | Tue, 17 Jan 2012 21:53:25 -0500 |
parents | |
children | ae2bc4e5fefc |
files | igvtools igvtools.jar igvtools_count.xml igvtools_sort.xml igvtools_tile.xml lib/galaxy/datatypes/igv.py tool-data/datatypes_conf.xml tool-data/igv_indices.loc.sample |
diffstat | 8 files changed, 420 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,2 @@ +#!/bin/sh +java -Xmx1500m -jar `dirname $0`/igvtools.jar $*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools_count.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,137 @@ +<tool id="igvtools_count" name="IGVtools count" version="1.0"> + <description>average feature density across the genome</description> + <command interpreter="bash">igvtools count + #if $zoom.__str__ != '': + -z $zoom + #end if + #if $window.__str__ != '': + -w $window + #end if + #if $extend.__str__ != '': + -e $extend + #end if + #if $window_functions.__str__ != '': + -f '$window_functions' + #end if + ## IGVTools relies on the file extension to determine format + #if $input.datatype.file_ext == 'bam': + #set $input_name='input_file.bam' + #elif $input.datatype.file_ext == 'sam': + #set $input_name='input_file.sam' + #elif $input.datatype.file_ext == 'bed': + #set $input_name='input_file.bed' + #elif $input.datatype.file_ext == 'psl': + #set $input_name='input_file.psl' + #end if + `ln -s $input $input_name; echo $input_name` '$output_fmt' $refGenomeSource.ref + </command> + <inputs> + <conditional name="refGenomeSource"> + <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in reference?"> + <option value="built-in">Use a built-in reference</option> + <option value="history">Use one from the history</option> + </param> + <when value="built-in"> + <param name="ref" type="select" label="Select a reference genome"> + <options from_file="igv_indices.loc"> + <column name="dbkey" index="0" /> + <column name="name" index="1" /> + <column name="value" index="2" /> + <filter type="sort_by" column="1" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref" type="data" format="igv.genome" metadata_name="dbkey" label="Select a reference from history" /> + </when> + </conditional> + <param name="input" type="data" format="sam,bam,bed,psl" label="Input file" help="The input BAM,SAM,BED,PSL feature file"/> + <param name="zoom" type="integer" value="7" optional="true" label="-z maximum zoom level to precompute" + help="The default value is 7 and is sufficient for most files. To reduce file + size at the expense of IGV performance this value can be reduced." /> + <param name="window" type="integer" value="25" optional="true" label="-w Window size" + help="The window size over which coverage is averaged. Defaults to 25 bp." /> + <param name="extend" type="integer" value="" optional="true" label="Extend feature length" + help="The read or feature is extended by the specified distance in bp prior to counting. + This option is useful for chip-seq and rna-seq applications. The value is generally set to the + average fragment length of the library." /> + <param name="window_functions" type="select" display="checkboxes" multiple="True" label="-f Functions to calculate over windows" + help="If none are selected, will default to mean"> + <option value="mean" selected="true">mean</option> + <option value="min">min</option> + <option value="max">max</option> + </param> + <param name="output_fmt" type="select" display="checkboxes" multiple="True" force_select="true" label="Select output format" + help="If none are selected, will default to mean"> + <option value="output.tdf" selected="true">IGV tdf</option> + <option value="output.wig">wig</option> + </param> + </inputs> + <outputs> + <data format="igv.tdf" name="output_tdf" metadata_source="input" label="${tool.name} on ${on_string}: igv.tdf" from_work_dir="output.tdf"> + <filter>('output.tdf' in output_fmt)</filter> + </data> + <data format="wig" name="output_wig" metadata_source="input" label="${tool.name} on ${on_string}: igv.wig" from_work_dir="output.wig"> + <filter>('output.wig' in output_fmt)</filter> + </data> + </outputs> + <tests> + </tests> + <help> +**What it does** + +The IGVTools_ count command computes average feature density over a specified window size across the genome. Common usages include computing coverage for alignment files and counting hits in Chip-seq experiments. By default, the resulting file will be displayed as a bar chart when loaded into IGV_. + +.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline +.. _IGV: http://www.broadinstitute.org/igv/ + +------ + +To cite your use of IGV in your publication:: + + James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. + Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011) + +------ + +**Input formats** + +Supported input file formats are: .sam, .bam, .aligned, .psl, .pslx, and .bed. + +------ + +**Outputs** + +The output formats are IGV tiled data file (TDF) file (.tdf) and/or WIG file (.wig) + +------- + + +**IGVTools count parameter list** + +This is an exhaustive list of igvtools count options: + +For **count**:: + + -z Integer Specifies the maximum zoom level to precompute. The default + value is 7 and is sufficient for most files. To reduce file + size at the expense of IGV performance this value can be + reduced. + + -w Integer The window size over which coverage is averaged. Defaults to 25 bp. + + -e Integer The read or feature is extended by the specified distance + in bp prior to counting. This option is useful for chip-seq + and rna-seq applications. The value is generally set to the + average fragment length of the library. + + -f list A comma delimited list specifying window functions to use + when reducing the data to precomputed tiles. Possible + values are min, max, and mean. By default only the mean + is calculated. + + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools_sort.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,80 @@ +<tool id="igvtools_sort" name="IGVtools sort" version="1.0"> + <description>input file by start position</description> + <command interpreter="bash">igvtools sort -t . + ## IGVTools relies on the file extension to determine format + #if $input.datatype.file_ext == 'vcf': + #set $input_name='input_file.vcf' + #set $output_name='output.vcf' + #elif $input.datatype.file_ext == 'sam': + #set $input_name='input_file.sam' + #set $output_name='output.sam' + #elif $input.datatype.file_ext == 'bed': + #set $input_name='input_file.bed' + #set $output_name='output.bed' + #elif $input.datatype.file_ext == 'psl': + #set $input_name='input_file.psl' + #set $output_name='output.psl' + #elif $input.datatype.file_ext == 'igv': + #set $input_name='input_file.igv' + #set $output_name='output.igv' + #elif $input.datatype.file_ext == 'igv.cn': + #set $input_name='input_file.cn' + #set $output_name='output.cn' + #end if + `ln -s $input $input_name; echo $input_name` $output_name + </command> + <inputs> + <param name="input" type="data" format="sam,bed,vcf,psl,igv,igv.cn," label="Input file SAM,BED,VCF format" + help="Use samtools or picard to sort bam files"/> + </inputs> + <outputs> + <data format_source="input" name="output_sam" metadata_source="input" label="${tool.name} on ${on_string}: igv.sam" from_work_dir="output.sam"> + <filter>(input.datatype.file_ext == 'sam')</filter> + </data> + <data format_source="input" name="output_bed" metadata_source="input" label="${tool.name} on ${on_string}: igv.bed" from_work_dir="output.bed"> + <filter>(input.datatype.file_ext == 'bed')</filter> + </data> + <data format_source="input" name="output_vcf" metadata_source="input" label="${tool.name} on ${on_string}: igv.vcf" from_work_dir="output.vcf"> + <filter>(input.datatype.file_ext == 'vcf')</filter> + </data> + <data format_source="input" name="output_psl" metadata_source="input" label="${tool.name} on ${on_string}: igv.vcf" from_work_dir="output.psl"> + <filter>(input.datatype.file_ext == 'psl')</filter> + </data> + <data format_source="input" name="output_igv" metadata_source="input" label="${tool.name} on ${on_string}: igv" from_work_dir="output.igv"> + <filter>(input.datatype.file_ext == 'igv')</filter> + </data> + <data format_source="input" name="output_cn" metadata_source="input" label="${tool.name} on ${on_string}: igv.cn" from_work_dir="output.cn"> + <filter>(input.datatype.file_ext == 'igv.cn')</filter> + </data> + </outputs> + <tests> + </tests> + <help> +**What it does** + +The IGVTools_ sort command sorts the input file by start position, as required. + +.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline +.. _IGV: http://www.broadinstitute.org/igv/ + +------ + +To cite your use of IGV in your publication:: + + James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. + Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011) + +------ + +**Input formats** + +Supported input file formats are: .cn, .igv, .sam, .aligned, .psl, .bed, and .vcf. + +------ + +**Outputs** + +The output will have the same format as the input file. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/igvtools_tile.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,125 @@ +<tool id="igvtools_tile" name="IGVtools tile" version="1.0"> + <description>convert a sorted data input file to a binary tiled data (.tdf) file</description> + <command interpreter="bash">igvtools tile + #if $zoom.__str__ != '': + -z $zoom + #end if + #if $window_functions.__str__ != '': + -f '$window_functions' + #end if + #if $probe.__str__ != '': + -p '$probe' + #end if + ## IGVTools relies on the file extension to determine format + #if $input.datatype.file_ext == 'wig': + #set $input_name='input_file.wig' + #elif $input.datatype.file_ext == 'igv.snp': + #set $input_name='input_file.snp' + #elif $input.datatype.file_ext == 'igv.gct': + #set $input_name='input_file.gct' + #elif $input.datatype.file_ext == 'igv.cn': + #set $input_name='input_file.cn' + #elif $input.datatype.file_ext == 'igv': + #set $input_name='input_file.igv' + #end if + `ln -s $input $input_name; echo $input_name` $output_tdf $refGenomeSource.ref + </command> + <inputs> + <conditional name="refGenomeSource"> + <param name="refGenomeSource_type" type="select" label="Will you select a reference genome from your history or use a built-in reference?"> + <option value="built-in">Use a built-in reference</option> + <option value="history">Use one from the history</option> + </param> + <when value="built-in"> + <param name="ref" type="select" label="Select a reference genome"> + <options from_file="igv_indices.loc"> + <column name="dbkey" index="0" /> + <column name="name" index="1" /> + <column name="value" index="2" /> + <filter type="sort_by" column="1" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref" type="data" format="igv.genome" metadata_name="dbkey" label="Select a reference from history" /> + </when> + </conditional> + <param name="input" type="data" format="wig,igv,igv.cn,igv.snp,igv.gct" label="Input file" help="The input WIG,CN,IGV,GCT,SNP feature file"/> + <param name="zoom" type="integer" value="7" optional="true" label="-z maximum zoom level to precompute" + help="The default value is 7 and is sufficient for most files. To reduce file + size at the expense of IGV performance this value can be reduced." /> + <param name="window_functions" type="select" display="checkboxes" multiple="True" label="-f Functions to calculate over windows" + help="If none are selected, will default to mean"> + <option value="mean" selected="true">mean</option> + <option value="min">min</option> + <option value="max">max</option> + </param> + <param name="probe" type="data" format="bed" optional="true" label="Probe file for GCT input" + help="Specifies a bed file to be used to map probe identifiers to locations. + This option is useful when preprocessing .gct files. + The bed file should contain 4 columns: + chr start end name + where name is the probe name in the .gct file."/> + </inputs> + <outputs> + <data format="igv.tdf" name="output_tdf" metadata_source="input" label="${tool.name} on ${on_string}: igv.tdf" from_work_dir="output.tdf"/> + </outputs> + <tests> + </tests> + <help> +**What it does** + +The IGVTools_ tile command converts a sorted data input file to a binary tiled data (.tdf) file. Use this command to pre-process large datasets for improved IGV performance. + +.. _IGVTools: http://www.broadinstitute.org/software/igv/igvtools_commandline +.. _IGV: http://www.broadinstitute.org/igv/ + +------ + +To cite your use of IGV in your publication:: + + James T. Robinson, Helga Thorvaldsdottir, Wendy Winckler, Mitchell Guttman, Eric S. Lander, Gad Getz, Jill P. Mesirov. + Integrative Genomics Viewer. Nature Biotechnology 29, 24-26 (2011) + +------ + +**Input formats** + +Supported input file formats are: .wig, .cn, .snp, .igv, and .gct. + +------ + +**Outputs** + +The output format is IGV tiled data file (TDF) file (.tdf) + +------- + + +**IGVTools count parameter list** + +This is an exhaustive list of igvtools count options: + +For **count**:: + + -z Integer Specifies the maximum zoom level to precompute. The default + value is 7 and is sufficient for most files. To reduce file + size at the expense of IGV performance this value can be + reduced. + + -p file Specifies a "bed" file to be used to map probe identifiers + to locations. This option is useful when preprocessing . gct + files. The bed file should contain 4 columns: + chr start end name + where name is the probe name in the .gct file. + + -f list A comma delimited list specifying window functions to use + when reducing the data to precomputed tiles. Possible + values are min, max, and mean. By default only the mean + is calculated. + + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/igv.py Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,49 @@ +""" +IGV datatypes +""" +import logging,zipfile +import galaxy.datatypes.binary +from galaxy.datatypes.binary import Binary + +log = logging.getLogger(__name__) + +class TiledDataFile( Binary ): + """Class describing an IGV tiled data file (TDF) .tdf binary file""" + file_ext = "igv.tdf" + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + + def sniff( self, filename ): + # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details + # about the format, see http://www.broadinstitute.org/software/igv/TDF + try: + header = open( filename ).read(4) + if binascii.b2a_hex( header ) == binascii.hexlify( 'TDF3' ): + return True + return False + except: + return False + +class GenomeDescriptor( Binary ): + """Class describing an IGV .genome zip archive file""" + file_ext = "igv.genome" + + def __init__( self, **kwd ): + Binary.__init__( self, **kwd ) + + def sniff( self, filename ): + # The first 4 bytes of a TDF file is 'TDF3', and the file is binary. For details + # about the format, see http://www.broadinstitute.org/software/igv/TDF + # The zipfile should contain a file named 'property.txt' which should have a key named 'sequenceLocation' + try: + if filename != None and zipfile.is_zipfile(filename): + genome_archive = zipfile.ZipFile(filename) + if 'property.txt' in genome_archive.namelist(): + fh = genome_archive.open('property.txt') + for i,l in enumerate(fh): + if l.startswith('sequenceLocation'): + return True + return False + except: + return False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/datatypes_conf.xml Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="igv.py"/> + </datatype_files> + <registration> + <datatype extension="igv" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/> + <datatype extension="igv.snp" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/> + <datatype extension="igv.cn" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/> + <datatype extension="igv.gct" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/> + <datatype extension="igv.res" type="galaxy.datatypes.tabular:Tabular" subclass="True" display_in_upload="True"/> + <datatype extension="igv.tdf" type="galaxy.datatypes.igv:TiledDataFile" display_in_upload="True"/> + <datatype extension="igv.genome" type="galaxy.datatypes.igv:GenomeDescriptor" display_in_upload="True"/> + </registration> + <sniffers> + <sniffer type="galaxy.datatypes.igv:TiledDataFile"/> + <sniffer type="galaxy.datatypes.igv:GenomeDescriptor"/> + </sniffers> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/igv_indices.loc.sample Tue Jan 17 21:53:25 2012 -0500 @@ -0,0 +1,8 @@ +# IGVTools .genome files +# from http://www.broadinstitute.org/software/igv/download +# igvtools that includes .genome files +# taken from: igvtools_1.5.16.zip includes .genome files (148 MB) +# +# format of this .loc file (3 tab-separated columns): +#dbkey description filepath +#hg19 Human hg19 /depot/data2/galaxy/IGV/2.0/hg19.genome