changeset 5:f4b4c1712e39

GSNAP - added datatypes for tally related data and gnsap default output
author Jim Johnson <jj@umn.edu>
date Tue, 08 Nov 2011 13:07:25 -0600
parents f49f5a460c74
children 3be0e0a858fe
files gmap/lib/galaxy/datatypes/gmap.py
diffstat 1 files changed, 48 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/gmap/lib/galaxy/datatypes/gmap.py	Tue Nov 08 13:02:32 2011 -0600
+++ b/gmap/lib/galaxy/datatypes/gmap.py	Tue Nov 08 13:07:25 2011 -0600
@@ -217,6 +217,13 @@
     """
     file_ext = 'snps.iit'
 
+class TallyIntervalIndexTree( IntervalIndexTree ):
+    """
+    A GMAP Interval Index Tree Map
+    created by iit_store
+    """
+    file_ext = 'tally.iit'
+
 class IntervalAnnotation( Text ):
     """
     Class describing a GMAP Interval format:
@@ -422,3 +429,44 @@
         return False
 
 
+class TallyAnnotation(IntervalAnnotation):
+    file_ext = 'gsnap_tally'
+    """
+    Output produced by gsnap_tally
+    Example:
+        >144 chr20:57268791..57268935
+        G0
+        A1(1@7|1Q-3)
+        A2(1@36,1@1|1Q2,1Q-8)
+        C2      0.889,0.912,0.889,0.889,0.933,0.912,0.912,0.889,0.889,0.889     -2.66,-2.89,-2.66,-2.66,-3.16,-2.89,-2.89,-2.66,-2.66,-2.66
+        C1 T1   0.888,0.9,0.888,0.9,0.913,0.9,0.911,0.888,0.9,0.913     -2.66,-2.78,-2.66,-2.78,-2.91,-2.78,-2.89,-2.66,-2.78,-2.91
+    """
+    def sniff( self, filename ): # TODO
+        """
+        Determines whether the file is a gmap splice site annotation file
+        """
+        try:
+            pat = '^>(\d+)\s((\S+):(\d+)\.\.(\d+))$' #>total chr:position..position
+            pat2 = '^[GATCN]\d.*$' #BaseCountDeatails
+            fh = open( filename )
+            count = 0
+            while True and count < 10:
+                line = fh.readline()
+                if not line:
+                    break #EOF
+                line = line.strip()
+                if line: #first non-empty line
+                    count += 1
+                    if re.match(pat,line) == None and re.match(pat2,line) == None: # Failed to match 
+                        return False
+        finally:
+            fh.close()
+        return False
+
+class GsnapResult( Text ):
+    """
+    The default output format for gsnap.   Can be used as input for gsnap_tally.
+    """
+    file_ext = 'gsnap'
+
+