changeset 2:7268cbc535e5 draft

findpeaks and maketagdirectory
author kevyin
date Fri, 23 Nov 2012 00:30:27 -0500
parents 59d58008c40d
children 4ea55669f00e
files README annotatePeaks.xml bed2pos.xml datatypes_conf.xml findPeaks.xml homerDatatypes.py makeTagDirectory.xml pos2bed.xml
diffstat 8 files changed, 349 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,13 @@
+Homer wrapper for Galaxy
+
+Code repo: https://bitbucket.org/gvl/homer
+
+=========================================:
+LICENSE for this wrapper: 
+=========================================:
+Kevin Ying
+Garvan Institute: http://www.garvan.org.au
+GVL: https://genome.edu.au/wiki/GVL
+
+http://opensource.org/licenses/mit-license.php
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/annotatePeaks.xml	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,41 @@
+<tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        annotatePeaks.pl $input_bed $genome_selector 1&gt; $out_annotated
+        2&gt; $out_log || echo "Error running annotatePeaks." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular,bed" name="input_bed" type="data" label="Homer peak positions or BED format" />
+        <param name="genome_selector" type="select" label="Genome version">
+            <option value="hg19" selected="true">hg19</option>
+        </param>
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+
+        .. class:: infomark
+
+        **Homer annoatePeaks**
+        More information on accepted formats
+        http://biowhat.ucsd.edu/homer/ngs/annotation.html
+
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bed2pos.xml	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,34 @@
+<tool id="homer_bed2pos" name="homer_bed2pos" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        bed2pos.pl $input_bed 1&gt; $out_pos
+        2&gt; $out_log || echo "Error running bed2pos." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular,bed" name="input_bed" type="data" label="BED file" />
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="tabular" name="out_pos" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        **Homer bed2pos.pl**
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="homerDatatypes.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="homerTagDirectory" type="galaxy.datatypes.homerDatatypes:HomerTagDirectory" display_in_upload="False" mimetype="text/html"/>
+  </registration>
+</datatypes>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/findPeaks.xml	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,52 @@
+<tool id="homer_findPeaks" name="homer_findPeaks" version="0.0.7">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        findPeaks $tagDir.extra_files_path $options -o $outputPeakFile
+
+    #if $control_tagDir:
+        -i $control_tagDir.extra_files_path
+    #end if
+
+        2&gt; $out_log || echo "Error running findPeaks." >&amp;2
+    </command>
+    <inputs>
+        <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" />
+        <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" />
+        <param type="text" name="options" label="Extra options" value="" >
+          <sanitizer>
+            <valid initial="string.printable">
+             <remove value="&apos;"/>
+            </valid>
+            <mapping initial="none">
+              <add source="&apos;" target="__sq__"/>
+            </mapping>
+          </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+
+  .. class:: infomark
+
+  **Homer findPeaks**
+  http://biowhat.ucsd.edu/homer/ngs/peaks.html
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/homerDatatypes.py	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,118 @@
+"""
+homer composite datatypes 
+Use at your peril
+Kevin Ying
+for the homer wrapper
+
+Nov 2012
+"""
+
+import logging, os, sys, time, tempfile, shutil, string, glob
+import data
+from galaxy import util
+from cgi import escape
+import urllib, binascii
+from galaxy.web import url_for
+from galaxy.datatypes import metadata
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.tabular import Tabular
+from galaxy.datatypes.images import Html
+from galaxy.datatypes.interval import Interval
+from galaxy.util.hash_util import *
+
+gal_Log = logging.getLogger(__name__)
+verbose = False
+
+class HomerTagDirectory(Html):
+    """
+    base class to use for homer tag directory datatypes
+    composite datatype elements
+    stored in extra files path
+    """
+   
+    #MetadataElement( name="base_name", desc="base name for all transformed versions of this genetic dataset", default='HomerTagDirectory',
+    #readonly=True, set_in_upload=False)
+    
+    composite_type = 'auto_primary_file'
+    #allow_datatype_change = False
+    file_ext = 'homerTagDirectory'
+
+    def __init__(self, **kwd):
+        Html.__init__(self, **kwd)
+        self.add_composite_file('tagInfo.txt', description="tagInfo")
+        self.add_composite_file('tagAutocorrelation.txt', description="tagAutocorrelation")
+        self.add_composite_file('tagCountDistribution.txt', description="tagCountDistribution")
+        self.add_composite_file('tagLengthDistribution.txt', description="tagLengthDistribution")
+
+    def generate_primary_file(self, dataset=None):
+        rval = ['<html><head><title>Homer Tag Directory Galaxy Composite Dataset </title></head><p/>']
+        rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
+        for composite_name, composite_file in self.get_composite_files( dataset = dataset ).iteritems():
+            fn = composite_name
+            opt_text = ''
+            if composite_file.optional:
+                opt_text = ' (optional)'
+            if composite_file.get('description'):
+                rval.append( '<li><a href="%s" type="application/binary">%s (%s)</a>%s</li>' % ( fn, fn, composite_file.get('description'), opt_text ) )
+            else:
+                rval.append( '<li><a href="%s" type="application/binary">%s</a>%s</li>' % ( fn, fn, opt_text ) )
+        rval.append( '</ul></div></html>' )
+        return "\n".join( rval )
+
+
+    def regenerate_primary_file(self,dataset):
+        """
+        cannot do this until we are setting metadata 
+        """
+        #bn = dataset.metadata.base_name
+        efp = dataset.extra_files_path
+        flist = os.listdir(efp)
+        rval = ['<html><head><title>Files for Composite Dataset %s</title></head><body><p/>Composite %s contains:<p/><ul>' % (dataset.name,dataset.name)]
+        for i,fname in enumerate(flist):
+            sfname = os.path.split(fname)[-1] 
+            f,e = os.path.splitext(fname)
+            rval.append( '<li><a href="%s">%s</a></li>' % ( sfname, sfname) )
+        rval.append( '</ul></body></html>' )
+        f = file(dataset.file_name,'w')
+        f.write("\n".join( rval ))
+        f.write('\n')
+        f.close()
+
+    def get_mime(self):
+        """Returns the mime type of the datatype"""
+        return 'text/html'
+
+    def set_meta( self, dataset, **kwd ):
+
+        """
+        for homer maketagdirectory eg
+
+        """
+        Html.set_meta( self, dataset, **kwd )
+        if kwd.get('overwrite') == False:
+            if verbose:
+                gal_Log.debug('@@@ HomerTagDirectory set_meta called with overwrite = False')
+            return True
+        try:
+            efp = dataset.extra_files_path
+        except: 
+            if verbose:                
+               gal_Log.debug('@@@ HomerTagDirectory set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0], dataset.name))
+            return False
+        try:
+            flist = os.listdir(efp)
+        except:
+            if verbose: gal_Log.debug('@@@ HomerTagDirectory set_meta failed %s - dataset %s has no efp ?' % (sys.exc_info()[0],dataset.name))
+            return False
+        if len(flist) == 0:
+            if verbose:
+                gal_Log.debug('@@@ HomerTagDirectory set_meta failed - %s efp %s is empty?' % (dataset.name,efp))
+            return False
+        self.regenerate_primary_file(dataset)
+        if not dataset.info:           
+                dataset.info = 'Galaxy HomerTagDirectory datatype object'
+        if not dataset.blurb:
+               dataset.blurb = 'Composite file - Homer Galaxy toolkit'
+        return True
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/makeTagDirectory.xml	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,48 @@
+<tool id="homer_makeTagDirectory" name="homer_makeTagDirectory" version="0.0.8">
+    <requirements>
+        <requirement type="package" >homer</requirement>
+    </requirements>
+    <description>makeTagDirectory</description>
+    <version_command>echo "Doh!"</version_command>
+    <command> makeTagDirectory ${tagDir.extra_files_path}
+        #for $alignF in $alignmentFiles
+          $alignF.file -format $alignF.file.ext
+        #end for
+        2&gt; $out_log || echo "Error running homer_makeTagDirectory." >&amp;2
+
+    </command>
+    <inputs>
+        <param name="title" label="Name for the output tag directory" type="text" default="Homer TagDirectory" />
+        <repeat name="alignmentFiles" title="Alignment Files">
+          <param name="file" label="Add file" type="data" format="sam,bed" />
+        </repeat>
+    </inputs>
+    <outputs>
+        <data format="homerTagDirectory" name="tagDir" label="${title}" />
+        <data format="txt" name="out_log" label="${title}.log" />
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+    </outputs>
+
+
+    <tests>
+        <!--<test>-->
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        <!--</test>-->
+    </tests>
+
+    <help>
+
+        .. class:: infomark
+
+        After the job finishes, save the zip file, unzip and open "index.html"
+
+        The actual meme-chip command is displayed under "Command line summary"
+
+        Defaults:
+        http://meme.nbcr.net/meme/doc/meme-chip.html
+
+    </help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pos2bed.xml	Fri Nov 23 00:30:27 2012 -0500
@@ -0,0 +1,34 @@
+<tool id="homer_pos2bed" name="homer_pos2bed" version="0.0.2">
+    <requirements>
+        <requirement type="package">homer</requirement>
+    </requirements>
+    <description></description>
+    <!--<version_command></version_command>-->
+    <command>
+        pos2bed.pl $input_peak 1&gt; $out_bed
+        2&gt; $out_log || echo "Error running pos2bed." >&amp;2
+    </command>
+    <inputs>
+        <param format="tabular" name="input_peak" type="data" label="Homer peak positions" />
+    </inputs>
+    <outputs>
+        <!--<data format="html" name="html_outfile" label="index" />-->
+        <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
+        <data format="bed" name="out_bed" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.bed" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
+            <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        **Homer pos2bed.pl**
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+