# HG changeset patch
# User iuc
# Date 1421931899 18000
# Node ID 0c4372b93e85ec1f8de4d5da42b0a032a8347ed9
Uploaded
diff -r 000000000000 -r 0c4372b93e85 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Thu Jan 22 08:04:59 2015 -0500
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 0c4372b93e85 snpsift_dbnsfp.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/snpsift_dbnsfp.py Thu Jan 22 08:04:59 2015 -0500
@@ -0,0 +1,85 @@
+"""
+SnpSift dbNSFP datatypes
+"""
+import os,os.path,re,sys,gzip,logging
+import traceback
+import galaxy.datatypes.data
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.metadata import MetadataElement
+
+log = logging.getLogger(__name__)
+
+class SnpSiftDbNSFP( Text ):
+ """Class describing a dbNSFP database prepared fpr use by SnpSift dbnsfp """
+ MetadataElement( name='reference_name', default='dbSNFP' , desc='Reference Name', readonly=True, visible=True, set_in_upload=True, no_value='dbSNFP' )
+ MetadataElement( name="bgzip", default=None, desc="dbNSFP bgzip", readonly=True, visible=True, no_value=None )
+ MetadataElement( name="index", default=None, desc="Tabix Index File", readonly=True, visible=True, no_value=None)
+ MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[] )
+ file_ext = "snpsiftdbnsfp"
+ composite_type = 'auto_primary_file'
+ allow_datatype_change = False
+ """
+ ## The dbNSFP file is a tabular file with 1 header line
+ ## The first 4 columns are required to be: chrom pos ref alt
+ ## These match columns 1,2,4,5 of the VCF file
+ ## SnpSift requires the file to be block-gzipped and the indexed with samtools tabix
+ ## Example:
+ ## Compress using block-gzip algorithm
+ bgzip dbNSFP2.3.txt
+ ## Create tabix index
+ tabix -s 1 -b 2 -e 2 dbNSFP2.3.txt.gz
+ """
+ def __init__( self, **kwd ):
+ Text.__init__( self, **kwd )
+ self.add_composite_file( '%s.grp', description = 'Group File', substitute_name_with_metadata = 'reference_name', is_binary = False )
+ self.add_composite_file( '%s.ti', description = '', substitute_name_with_metadata = 'reference_name', is_binary = False )
+ def init_meta( self, dataset, copy_from=None ):
+ Text.init_meta( self, dataset, copy_from=copy_from )
+ def generate_primary_file( self, dataset = None ):
+ """
+ This is called only at upload to write the html file
+ cannot rename the datasets here - they come with the default unfortunately
+ """
+ regenerate_primary_file( self, dataset)
+ def regenerate_primary_file(self,dataset):
+ """
+ cannot do this until we are setting metadata
+ """
+ annotations = "dbNSFP Annotations: %s\n" % ','.join(dataset.metadata.annotation)
+ f = open(dataset.file_name,'a')
+ if dataset.metadata.bgzip:
+ bn = dataset.metadata.bgzip
+ f.write(bn)
+ f.write('\n')
+ f.write(annotations)
+ f.close()
+ def set_meta( self, dataset, overwrite=True, **kwd ):
+ try:
+ efp = dataset.extra_files_path
+ if os.path.exists(efp):
+ flist = os.listdir(efp)
+ for i,fname in enumerate(flist):
+ if fname.endswith('.gz'):
+ dataset.metadata.bgzip = fname
+ try:
+ fh = gzip.open(os.path.join(efp,fname),'r')
+ buf = fh.read(5000)
+ lines = buf.splitlines()
+ headers = lines[0].split('\t')
+ dataset.metadata.annotation = headers[4:]
+ except Exception,e:
+ log.warn("set_meta fname: %s %s" % (fname,str(e)))
+ traceback.print_stack(file=sys.stderr)
+ finally:
+ fh.close()
+ if fname.endswith('.tbi'):
+ dataset.metadata.index = fname
+ self.regenerate_primary_file(dataset)
+ except Exception,e:
+ log.warn("set_meta fname: %s %s" % (dataset.file_name if dataset and dataset.file_name else 'Unkwown',str(e)))
+ traceback.print_stack(file=sys.stderr)
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod(sys.modules[__name__])
+
diff -r 000000000000 -r 0c4372b93e85 tabular_to_dbnsfp.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tabular_to_dbnsfp.py Thu Jan 22 08:04:59 2015 -0500
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+"""
+Uses pysam to bgzip a file
+
+usage: %prog in_file out_file
+"""
+
+from galaxy import eggs
+import pkg_resources; pkg_resources.require( "pysam" )
+import ctabix, subprocess, tempfile, sys, optparse, os.path
+
+def main():
+ # Read options, args.
+ usage = "Usage: %prog [options] tabular_input_file bgzip_output_file"
+ parser = optparse.OptionParser(usage = usage)
+ parser.add_option( '-c', '--chr-col', type='int', default=0, dest='chrom_col' )
+ parser.add_option( '-s', '--start-col', type='int', default=1, dest='start_col' )
+ parser.add_option( '-e', '--end-col', type='int', default=1, dest='end_col' )
+ (options, args) = parser.parse_args()
+ if len(args) != 2:
+ parser.print_usage()
+ exit(1)
+ input_fname, output_fname = args
+ output_dir = os.path.dirname(output_fname)
+ if not os.path.exists(output_dir):
+ os.makedirs(output_dir)
+ ctabix.tabix_compress(input_fname, output_fname, force=True)
+ # Column indices are 0-based.
+ ctabix.tabix_index(output_fname, seq_col=options.chrom_col,start_col=options.start_col,end_col=options.end_col)
+
+if __name__ == "__main__":
+ main()
+
diff -r 000000000000 -r 0c4372b93e85 tabular_to_dbnsfp.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tabular_to_dbnsfp.xml Thu Jan 22 08:04:59 2015 -0500
@@ -0,0 +1,12 @@
+
+
+ tabular_to_dbnsfp.py $input $dbnsfp.extra_files_path/dbNSFP.gz
+
+
+
+
+
+
+
+
+