changeset 4:74c1fc7bb164 draft

display tag directories with html
author kevyin
date Tue, 27 Nov 2012 02:34:33 -0500
parents 4ea55669f00e
children 8cfd322a5907
files README annotatePeaks.xml bed2pos.xml datatypes_conf.xml findPeaks.xml homerDatatypes.py makeTagDirectory.py makeTagDirectory.xml pos2bed.xml
diffstat 9 files changed, 449 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,13 @@
+Homer wrapper for Galaxy
+
+Code repo: https://bitbucket.org/gvl/homer
+
+=========================================:
+LICENSE for this wrapper: 
+=========================================:
+Kevin Ying
+Garvan Institute: http://www.garvan.org.au
+GVL: https://genome.edu.au/wiki/GVL
+
+http://opensource.org/licenses/mit-license.php
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/annotatePeaks.xml	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,41 @@
+<tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        annotatePeaks.pl $input_bed $genome_selector 1&gt; $out_annotated
+        2&gt; $out_log || echo "Error running annotatePeaks." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular,bed" name="input_bed" type="data" label="Homer peak positions or BED format" />
+        <param name="genome_selector" type="select" label="Genome version">
+            <option value="hg19" selected="true">hg19</option>
+        </param>
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+
+        .. class:: infomark
+
+        **Homer annoatePeaks**
+        More information on accepted formats
+        http://biowhat.ucsd.edu/homer/ngs/annotation.html
+
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bed2pos.xml	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,34 @@
+<tool id="homer_bed2pos" name="homer_bed2pos" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        bed2pos.pl $input_bed 1&gt; $out_pos
+        2&gt; $out_log || echo "Error running bed2pos." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular,bed" name="input_bed" type="data" label="BED file" />
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="tabular" name="out_pos" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        **Homer bed2pos.pl**
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="homerDatatypes.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="homerTagDirectory" type="galaxy.datatypes.homerDatatypes:HomerTagDirectory" display_in_upload="False" mimetype="text/html"/>
+  </registration>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/findPeaks.xml	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,52 @@
+<tool id="homer_findPeaks" name="homer_findPeaks" version="0.1.0">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description>Homer's peakcaller. Requires tag directories (see makeTagDirectory)</description>
+    <!--<version_command></version_command>-->
+    <command>
+        findPeaks $tagDir.extra_files_path $options -o $outputPeakFile
+
+    #if $control_tagDir:
+        -i $control_tagDir.extra_files_path
+    #end if
+
+        2&gt; $out_log || echo "Error running findPeaks." >&amp;2
+    </command>
+    <inputs>
+        <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" />
+        <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" />
+        <param type="text" name="options" label="Extra options" value="" >
+          <sanitizer>
+            <valid initial="string.printable">
+             <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+              <add source="&apos;" target="__sq__"/>
+            </mapping>
+          </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+
+  .. class:: infomark
+
+  **Homer findPeaks**
+  http://biowhat.ucsd.edu/homer/ngs/peaks.html
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/homerDatatypes.py	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,122 @@
+"""
+homer composite datatypes 
+NOTE: makeTagDirectory doesn't actually use this!!!! It uses html as the format
+      I've never got it to properly display the contents of the datatype for after 
+      clicking the "eye" icon, so using html instead
+
+Use at your peril
+Kevin Ying
+for the homer wrapper
+
+Dec 2012
+"""
+
+import logging, os, sys, time, tempfile, shutil, string, glob
+import data
+from galaxy import util
+from cgi import escape
+import urllib, binascii
+from galaxy.web import url_for
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.images import Html
+from galaxy.datatypes.interval import Interval
+from galaxy.util.hash_util import *
+
+gal_Log = logging.getLogger(__name__)
+verbose = False
+
+class HomerTagDirectory(Html):
+    """
+    base class to use for homer tag directory datatypes
+    composite datatype elements
+    stored in extra files path
+    """
+   
+    #MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='HomerTagDirectory',
+    #readonly=True, set_in_upload=False)
+    
+    composite_type = 'auto_primary_file'
+    #allow_datatype_change = False
+    file_ext = 'homerTagDirectory'
+
+    def __init__(self, **kwd):
+        Html.__init__(self, **kwd)
+        self.add_composite_file('tagInfo.txt', description="tagInfo")
+        self.add_composite_file('tagAutocorrelation.txt', description="tagAutocorrelation")
+        self.add_composite_file('tagCountDistribution.txt', description="tagCountDistribution")
+        self.add_composite_file('tagLengthDistribution.txt', description="tagLengthDistribution")
+
+    def generate_primary_file(self, dataset=None):
+        rval = ['<html><head><title>Homer Tag Directory Galaxy Composite Dataset </title></head><p/>']
+        rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
+        for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
+            fn = composite_name
+            opt_text = ''
+            if composite_file.optional:
+                opt_text = ' (optional)'
+            if composite_file.get('description'):
+                rval.append( '<li><a href="%s" type="application/binary">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
+            else:
+                rval.append( '<li><a href="%s" type="application/binary">%s</a>%s</li>' % ( fn, fn, opt_text ) )
+        rval.append( '</ul></div></html>' )
+        return "\n".join( rval )
+
+
+    def regenerate_primary_file(self,dataset):
+        """
+        cannot do this until we are setting metadata 
+        """
+        #bn = dataset.metadata.base_name
+        efp = dataset.extra_files_path
+        flist = os.listdir(efp)
+        rval = ['<html><head><title>Files for Composite Dataset %s</title></head><body><p/>Composite %s contains:<p/><ul>' % (dataset.name,dataset.name)]
+        for i,fname in enumerate(flist):
+            sfname = os.path.split(fname)[-1] 
+            f,e = os.path.splitext(fname)
+            rval.append( '<li><a href="%s">%s</a></li>' % ( sfname, sfname) )
+        rval.append( '</ul></body></html>' )
+        f = file(dataset.file_name,'w')
+        f.write("\n".join( rval ))
+        f.write('\n')
+        f.close()
+
+    def get_mime(self):
+        """Returns the mime type of the datatype"""
+        return 'text/html'
+
+    def set_meta( self, dataset, **kwd ):
+
+        """
+        for homer maketagdirectory eg
+
+        """
+        Html.set_meta( self, dataset, **kwd )
+        if kwd.get('overwrite') == False:
+            if verbose:
+                gal_Log.debug('@@@ HomerTagDirectory set_meta called with overwrite = False')
+            return True
+        try:
+            efp = dataset.extra_files_path
+        except: 
+            if verbose:                
+               gal_Log.debug('@@@ HomerTagDirectory set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0], dataset.name))
+            return False
+        try:
+            flist = os.listdir(efp)
+        except:
+            if verbose: gal_Log.debug('@@@ HomerTagDirectory set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0],dataset.name))
+            return False
+        if len(flist) == 0:
+            if verbose:
+                gal_Log.debug('@@@ HomerTagDirectory set_meta failed - %s efp %s is empty?' % (dataset.name,efp))
+            return False
+        self.regenerate_primary_file(dataset)
+        if not dataset.info:           
+                dataset.info = 'Galaxy HomerTagDirectory datatype object'
+        if not dataset.blurb:
+               dataset.blurb = 'Composite file - Homer Galaxy toolkit'
+        return True
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/makeTagDirectory.py	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,94 @@
+"""
+
+
+"""
+import re
+import os
+import sys
+import subprocess
+import optparse
+import shutil
+import tempfile
+
+def getFileString(fpath, outpath):
+    """
+    format a nice file size string
+    """
+    size = ''
+    fp = os.path.join(outpath, fpath)
+    s = '? ?'
+    if os.path.isfile(fp):
+        n = float(os.path.getsize(fp))
+        if n > 2**20:
+            size = ' (%1.1f MB)' % (n/2**20)
+        elif n > 2**10:
+            size = ' (%1.1f KB)' % (n/2**10)
+        elif n > 0:
+            size = ' (%d B)' % (int(n))
+        s = '%s %s' % (fpath, size) 
+    return s
+
+class makeTagDirectory():
+    """wrapper
+    """
+
+    def __init__(self,opts=None, args=None):
+        self.opts = opts
+        self.args = args
+        
+    def run_makeTagDirectory(self):
+        """
+        makeTagDirectory <Output Directory Name> [options] <alignment file1> [alignment file 2] 
+
+        """
+        if self.opts.format != "bam":
+            cl = [self.opts.executable] + args + ["-format" , self.opts.format]
+        else:
+            cl = [self.opts.executable] + args
+        print cl
+        p = subprocess.Popen(cl)
+        retval = p.wait()
+
+
+        html = self.gen_html(args[0])
+        #html = self.gen_html()
+        return html,retval
+
+    def gen_html(self, dr=os.getcwd()):
+        flist = os.listdir(dr)
+        print flist
+        """ add a list of all files in the tagdirectory
+        """
+        res = ['<div class="module"><h2>Files created by makeTagDirectory</h2><table cellspacing="2" cellpadding="2">\n']
+
+        flist.sort()
+        for i,f in enumerate(flist):
+             if not(os.path.isdir(f)):
+                 fn = os.path.split(f)[-1]
+                 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, dr)))
+
+        res.append('</table>\n') 
+
+        return res
+
+if __name__ == '__main__':
+    op = optparse.OptionParser()
+    op.add_option('-e', '--executable', default='makeTagDirectory')
+    op.add_option('-o', '--htmloutput', default=None)
+    op.add_option('-f', '--format', default="sam")
+    opts, args = op.parse_args()
+    #assert os.path.isfile(opts.executable),'## makeTagDirectory.py error - cannot find executable %s' % opts.executable
+
+    #if not os.path.exists(opts.outputdir): 
+        #os.makedirs(opts.outputdir)
+    f = makeTagDirectory(opts, args)
+
+    html,retval = f.run_makeTagDirectory()
+    f = open(opts.htmloutput, 'w')
+    f.write(''.join(html))
+    f.close()
+    if retval <> 0:
+         print >> sys.stderr, serr # indicate failure
+    
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/makeTagDirectory.xml	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,50 @@
+<tool id="homer_makeTagDirectory" name="homer_makeTagDirectory" version="1.0.0">
+    <requirements>
+        <requirement type="package" >homer</requirement>
+    </requirements>
+    <description>Simple wrapper for makeTagDirectory. Used by findPeaks</description>
+    <!--<version_command></version_command>-->
+    <command interpreter="python">makeTagDirectory.py ${tagDir.files_path} 
+        #for $alignF in $alignmentFiles
+          $alignF.file -f $alignF.file.ext
+        #end for
+          -o $tagDir
+        2&gt; $out_log || echo "Error running homer_makeTagDirectory." >&amp;2
+
+    </command>
+    <inputs>
+        <param name="title" label="Name for the output tag directory" type="text" default="Homer TagDirectory" />
+        <repeat name="alignmentFiles" title="Alignment Files">
+          <param name="file" label="Add file" type="data" format="sam,bed" />
+        </repeat>
+    </inputs>
+    <outputs>
+        <!--<data format="homerTagDirectory" name="tagDir" label="${title} tag directory" />-->
+        <data format="html" name="tagDir" label="${title} tag directory" />
+        <data format="txt" name="out_log" label="${title}.log" />
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+    </outputs>
+
+
+    <tests>
+        <!--<test>-->
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        <!--</test>-->
+    </tests>
+
+    <help>
+
+        .. class:: infomark
+
+        After the job finishes, save the zip file, unzip and open "index.html"
+
+        The actual meme-chip command is displayed under "Command line summary"
+
+        Defaults:
+        http://meme.nbcr.net/meme/doc/meme-chip.html
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pos2bed.xml	Tue Nov 27 02:34:33 2012 -0500
@@ -0,0 +1,34 @@
+<tool id="homer_pos2bed" name="homer_pos2bed" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        pos2bed.pl $input_peak 1&gt; $out_bed
+        2&gt; $out_log || echo "Error running pos2bed." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular" name="input_peak" type="data" label="Homer peak positions" />
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="bed" name="out_bed" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.bed" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        **Homer pos2bed.pl**
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+