Galaxy |

Changeset 16:687df269e597 (2012-12-19)

Previous changeset 15:529485c1dda1 (2012-12-19) Next changeset 17:686a6b9f3882 (2012-12-19)

Commit message:
Uploaded

added:
README
annotatePeaks.xml
bed2pos.xml
findPeaks.xml
makeTagDirectory.py
makeTagDirectory.xml
pos2bed.xml
tool_dependencies.xml

diff -r 529485c1dda1 -r 687df269e597 README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README Wed Dec 19 17:28:55 2012 -0500

@@ -0,0 +1,13 @@
+Homer wrapper for Galaxy
+
+Code repo: https://bitbucket.org/gvl/homer
+
+=========================================:
+LICENSE for this wrapper:
+=========================================:
+Kevin Ying
+Garvan Institute: http://www.garvan.org.au
+GVL: https://genome.edu.au/wiki/GVL
+
+http://opensource.org/licenses/mit-license.php
+

diff -r 529485c1dda1 -r 687df269e597 annotatePeaks.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/annotatePeaks.xml Wed Dec 19 17:28:55 2012 -0500

[

b'@@ -0,0 +1,164 @@\n+<tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.4">\n+ <requirements>\n+ <requirement type="package" version="4.1">homer</requirement>\n+ </requirements>\n+ <description></description>\n+ \n+ <command>\n+ annotatePeaks.pl $input_bed $genome_selector 1> $out_annotated\n+ 2> $out_log || echo "Error running annotatePeaks." >&2\n+ </command>\n+ <inputs>\n+ <param format="tabular,bed" name="input_bed" type="data" label="Homer peaks OR BED format"/>\n+ <param name="genome_selector" type="select" label="Genome version">\n+ <option value="hg19" selected="true">hg19</option>\n+ </param>\n+ <param type="text" name="options" label="Extra options" value="" help="See link below for more options">\n+ <sanitizer>\n+ <valid initial="string.printable">\n+ <remove value="'"/>\n+ <remove value="/"/>\n+ </valid>\n+ <mapping initial="none">\n+ <add source="'" target="__sq__"/>\n+ </mapping>\n+ </sanitizer>\n+ </param>\n+ </inputs>\n+ <outputs>\n+ \n+ \n+ <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" />\n+ <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" />\n+ </outputs>\n+ <tests>\n+ <test>\n+ \n+ \n+ </test>\n+ </tests>\n+\n+ <help>\n+\n+ .. class:: infomark\n+\n+ **Homer annoatePeaks**\n+\n+ More information on accepted formats and options\n+\n+ http://biowhat.ucsd.edu/homer/ngs/annotation.html\n+\n+ TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.\n+\n+**Parameter list**\n+\n+Command line options (not all of them are supported)::\n+\n+\tUsage: annotatePeaks.pl <peak file | tss> <genome version> [additional options...]\n+\n+\tAvailable Genomes (required argument): (name,org,directory,default promoter set)\n+\t\t\t-- or --\n+\t\tCustom: provide the path to genome FASTA files (directory or single file)\n+\n+\tUser defined annotation files (default is UCSC refGene annotation):\n+\t\tannotatePeaks.pl accepts GTF (gene transfer formatted) files to annotate positions relative\n+\t\tto custom annotations, such as those from de novo transcript discovery or Gencode.\n+\t\t-gtf <gtf format file> (-gff and -gff3 can work for those files, but GTF is better)\n+\n+\tPeak vs. tss/tts/rna mode (works with custom GTF file):\n+\t\tIf the first argument is "tss" (i.e. annotatePeaks.pl tss hg18 ...) then a TSS centric\n+\t\tanalysis will be carried out. Tag counts and motifs will be found relative to the TSS.\n+\t\t(no position file needed) ["tts" now works too - e.g. 3' end of gene]\n+\t\t["rna" specifies gene bodies, will automaticall set "-size given"]\n+\t\tNOTE: The default TSS peak size is 4000 bp, i.e. +/- 2kb (change with -size option)\n+\t\t-list <gene id list> (subset of genes to perform analysis [unigene, gene id, accession,\n+\t\t\t probe, etc.], default = all promoters)\n+\t\t-cTSS <promoter position file i.e. peak file> (should be centered on TSS)\n+\n+\tPrimary Annotation Options:\n+\t\t-mask (Masked repeats, can also add 'r' to end of genome name)\n+\t\t-m <motif file 1> [motif file 2] ... (list of motifs to find in peaks)\n+\t\t\t-mscore (reports the highest log-odds score within the peak)\n+\t\t\t-nmotifs (reports the number of motifs per peak)\n+\t\t\t-mdist (reports di'..b' analysis to these individuals)\n+\t\t-gene <data file> ... (Adds additional data to result based on the closest gene.\n+\t\t\tThis is useful for adding gene expression data. The file must have a header,\n+\t\t\tand the first column must be a GeneID, Accession number, etc. If the peak\n+\t\t\tcannot be mapped to data in the file then the entry will be left empty.\n+\t\t-go <output directory> (perform GO analysis using genes near peaks)\n+\t\t-genomeOntology <output directory> (perform genomeOntology analysis on peaks)\n+\t\t\t-gsize <#> (Genome size for genomeOntology analysis, default: 2e9)\n+\n+\tAnnotation vs. Histogram mode:\n+\t\t-hist <bin size in bp> (i.e 1, 2, 5, 10, 20, 50, 100 etc.)\n+\t\tThe -hist option can be used to generate histograms of position dependent features relative\n+\t\tto the center of peaks. This is primarily meant to be used with -d and -m options to map\n+\t\tdistribution of motifs and ChIP-Seq tags. For ChIP-Seq peaks for a Transcription factor\n+\t\tyou might want to use the -center option (below) to center peaks on the known motif\n+\t\t** If using "-size given", histogram will be scaled to each region (i.e. 0-100%), with\n+\t\tthe -hist parameter being the number of bins to divide each region into.\n+\t\t\tHistogram Mode specific Options:\n+\t\t\t-nuc (calculated mononucleotide frequencies at each position,\n+\t\t\t\tWill report by default if extracting sequence for other purposes like motifs)\n+\t\t\t-di (calculated dinucleotide frequencies at each position)\n+\t\t\t-histNorm <#> (normalize the total tag count for each region to 1, where <#> is the\n+\t\t\t\tminimum tag total per region - use to avoid tag spikes from low coverage\n+\t\t\t-ghist (outputs profiles for each gene, for peak shape clustering)\n+\t\t\t-rm <#> (remove occurrences of same motif that occur within # bp)\n+\n+\tPeak Centering: (other options are ignored)\n+\t\t-center <motif file> (This will re-center peaks on the specified motif, or remove peak\n+\t\t\tif there is no motif in the peak. ONLY recentering will be performed, and all other\n+\t\t\toptions will be ignored. This will output a new peak file that can then be reanalyzed\n+\t\t\tto reveal fine-grain structure in peaks (It is advised to use -size < 200) with this\n+\t\t\tto keep peaks from moving too far (-mirror flips the position)\n+\t\t-multi (returns genomic positions of all sites instead of just the closest to center)\n+\n+\tAdvanced Options:\n+\t\t-len <#> / -fragLength <#> (Fragment length, default=auto, might want to set to 0 for RNA)\n+\t\t-size <#> (Peak size[from center of peak], default=inferred from peak file)\n+\t\t\t-size #,# (i.e. -size -10,50 count tags from -10 bp to +50 bp from center)\n+\t\t\t-size "given" (count tags etc. using the actual regions - for variable length regions)\n+\t\t-log (output tag counts as log2(x+1+rand) values - for scatter plots)\n+\t\t-sqrt (output tag counts as sqrt(x+rand) values - for scatter plots)\n+\t\t-strand <+|-|both> (Count tags on specific strands relative to peak, default: both)\n+\t\t-pc <#> (maximum number of tags to count per bp, default=0 [no maximum])\n+\t\t-cons (Retrieve conservation information for peaks/sites)\n+\t\t-CpG (Calculate CpG/GC content)\n+\t\t-ratio (process tag values as ratios - i.e. chip-seq, or mCpG/CpG)\n+\t\t-nfr (report nuclesome free region scores instead of tag counts, also -nfrSize <#>)\n+\t\t-norevopp (do not search for motifs on the opposite strand [works with -center too])\n+\t\t-noadj (do not adjust the tag counts based on total tags sequenced)\n+\t\t-norm <#> (normalize tags to this tag count, default=1e7, 0=average tag count in all directories)\n+\t\t-pdist (only report distance to nearest peak using -p, not peak name)\n+\t\t-map <mapping file> (mapping between peak IDs and promoter IDs, overrides closest assignment)\n+\t\t-noann, -nogene (skip genome annotation step, skip TSS annotation)\n+\t\t-homer1/-homer2 (by default, the new version of homer [-homer2] is used for finding motifs)\n+\n+\n+ </help>\n+</tool>\n+\n'

diff -r 529485c1dda1 -r 687df269e597 bed2pos.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bed2pos.xml Wed Dec 19 17:28:55 2012 -0500

[

@@ -0,0 +1,37 @@
+<tool id="homer_bed2pos" name="homer_bed2pos" version="0.0.3">
+    <requirements>
+        <requirement type="package" version="4.1">homer</requirement>
+    </requirements>
+    <description></description>
+    
+    <command>
+        bed2pos.pl $input_bed 1> $out_pos
+        2> $out_log || echo "Error running bed2pos." >&2
+    </command>
+    <inputs>
+        <param format="tabular,bed" name="input_bed" type="data" label="BED file" />
+    </inputs>
+    <outputs>
+        
+        
+        <data format="tabular" name="out_pos" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            
+            
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        Converts: BED -(to)-> homer peak positions
+
+        **Homer bed2pos.pl**
+
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+

diff -r 529485c1dda1 -r 687df269e597 findPeaks.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/findPeaks.xml Wed Dec 19 17:28:55 2012 -0500

[

@@ -0,0 +1,122 @@
+<tool id="homer_findPeaks" name="homer_findPeaks" version="0.1.2">
+    <requirements>
+        <requirement type="package" version="4.1">homer</requirement>
+    </requirements>
+    <description>Homer's peakcaller. Requires tag directories (see makeTagDirectory)</description>
+    
+    <command>
+        findPeaks $tagDir.extra_files_path $options -o $outputPeakFile
+
+    #if $control_tagDir:
+        -i $control_tagDir.extra_files_path
+    #end if
+
+        2> $out_log || echo "Error running findPeaks." >&2
+    </command>
+    <inputs>
+        <param format="homerTagDirectory" name="tagDir" type="data" label="tag directory" help="Must be made with homer_makeTagDirectory" />
+        <param format="homerTagDirectory" name="control_tagDir" type="data" optional="True" label="Control tag directory" help="Must be made with homer_makeTagDirectory" />
+        <param type="text" name="options" label="Extra options" value="" help="See link below for more options">
+          <sanitizer>
+            <valid initial="string.printable">
+             <remove value="'"/>
+             <remove value="/"/>
+            </valid>
+            <mapping initial="none">
+              <add source="'" target="__sq__"/>
+            </mapping>
+          </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        
+        
+        <data format="txt" name="outputPeakFile" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.txt" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($tagDir.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            
+            
+        </test>
+    </tests>
+
+    <help>
+
+  .. class:: infomark
+
+  **Homer findPeaks**
+
+  For more options, look under: "Command line options for findPeaks"
+
+  http://biowhat.ucsd.edu/homer/ngs/peaks.html
+
+  TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.
+
+**Parameter list**
+
+Command line options (not all of them are supported)::
+
+ Usage: findPeaks <tag directory> [options]
+
+ Finds peaks in the provided tag directory.  By default, peak list printed to stdout
+
+ General analysis options:
+ -o <filename|auto> (file name for to output peaks, default: stdout)
+ "-o auto" will send output to "<tag directory>/peaks.txt", ".../regions.txt",
+ or ".../transcripts.txt" depending on the "-style" option
+ -style <option> (Specialized options for specific analysis strategies)
+ factor (transcription factor ChIP-Seq, uses -center, output: peaks.txt,  default)
+ histone (histone modification ChIP-Seq, region based, uses -region -size 500 -L 0, regions.txt)
+ groseq (de novo transcript identification from GroSeq data, transcripts.txt)
+ tss (TSS identification from 5' RNA sequencing, tss.txt)
+ dnase (Hypersensitivity [crawford style (nicking)], peaks.txt)
+
+ chipseq/histone options:
+ -i <input tag directory> (Experiment to use as IgG/Input/Control)
+ -size <#> (Peak size, default: auto)
+ -minDist <#> (minimum distance between peaks, default: peak size x2)
+ -gsize <#> (Set effective mappable genome size, default: 2e9)
+ -fragLength <#|auto> (Approximate fragment length, default: auto)
+ -inputFragLength <#|auto> (Approximate fragment length of input tags, default: auto)
+ -tbp <#> (Maximum tags per bp to count, 0 = no limit, default: auto)
+ -inputtbp <#> (Maximum tags per bp to count in input, 0 = no limit, default: auto)
+ -strand <both|separate> (find peaks using tags on both strands or separate, default:both)
+ -norm # (Tag count to normalize to, default 10000000)
+ -region (extends start/stop coordinates to cover full region considered "enriched")
+ -center (Centers peaks on maximum tag overlap and calculates focus ratios)
+ -nfr (Centers peaks on most likely nucleosome free region [works best with mnase data])
+ (-center and -nfr can be performed later with "getPeakTags"
+
+ Peak Filtering options: (set -F/-L/-C to 0 to skip)
+ -F <#> (fold enrichment over input tag count, default: 4.0)
+   -P <#> (poisson p-value threshold relative to input tag count, default: 0.0001)
+ -L <#> (fold enrichment over local tag count, default: 4.0)
+   -LP <#> (poisson p-value threshold relative to local tag count, default: 0.0001)
+ -C <#> (fold enrichment limit of expected unique tag positions, default: 2.0)
+ -localSize <#> (region to check for local tag enrichment, default: 10000)
+ -inputSize <#> (Size of region to search for control tags, default: 2x peak size)
+ -fdr <#> (False discovery rate, default = 0.001)
+ -poisson <#> (Set poisson p-value cutoff, default: uses fdr)
+ -tagThreshold <#> (Set # of tags to define a peak, default: 25)
+ -ntagThreshold <#> (Set # of normalized tags to define a peak, by default uses 1e7 for norm)
+ -minTagThreshold <#> (Absolute minimum tags per peak, default: expected tags per peak)
+
+ GroSeq Options: (Need to specify "-style groseq"):
+ -tssSize <#> (size of region for initiation detection/artifact size, default: 250)
+ -minBodySize <#> (size of regoin for transcript body detection, default: 1000)
+ -maxBodySize <#> (size of regoin for transcript body detection, default: 10000)
+ -tssFold <#> (fold enrichment for new initiation dectection, default: 4.0)
+ -bodyFold <#> (fold enrichment for new transcript dectection, default: 4.0)
+ -endFold <#> (end transcript when levels are this much less than the start, default: 10.0)
+ -fragLength <#> (Approximate fragment length, default: 150)
+ -uniqmap <directory> (directory of binary files specifying uniquely mappable locations)
+ Download from http://biowhat.ucsd.edu/homer/groseq/
+ -confPvalue <#> (confidence p-value: 1.00e-05)
+ -minReadDepth <#> (Minimum initial read depth for transcripts, default: auto)
+ -pseudoCount <#> (Pseudo tag count, default: 2.0)
+ -gtf <filename> (Output de novo transcripts in GTF format)
+ "-o auto" will produce <dir>/transcripts.txt and <dir>/transcripts.gtf
+    </help>
+</tool>
+

diff -r 529485c1dda1 -r 687df269e597 makeTagDirectory.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/makeTagDirectory.py Wed Dec 19 17:28:55 2012 -0500

[

@@ -0,0 +1,94 @@
+"""
+
+
+"""
+import re
+import os
+import sys
+import subprocess
+import optparse
+import shutil
+import tempfile
+
+def getFileString(fpath, outpath):
+    """
+    format a nice file size string
+    """
+    size = ''
+    fp = os.path.join(outpath, fpath)
+    s = '? ?'
+    if os.path.isfile(fp):
+        n = float(os.path.getsize(fp))
+        if n > 2**20:
+            size = ' (%1.1f MB)' % (n/2**20)
+        elif n > 2**10:
+            size = ' (%1.1f KB)' % (n/2**10)
+        elif n > 0:
+            size = ' (%d B)' % (int(n))
+        s = '%s %s' % (fpath, size)
+    return s
+
+class makeTagDirectory():
+    """wrapper
+    """
+
+    def __init__(self,opts=None, args=None):
+        self.opts = opts
+        self.args = args
+
+    def run_makeTagDirectory(self):
+        """
+        makeTagDirectory <Output Directory Name> [options] <alignment file1> [alignment file 2]
+
+        """
+        if self.opts.format != "bam":
+            cl = [self.opts.executable] + args + ["-format" , self.opts.format]
+        else:
+            cl = [self.opts.executable] + args
+        print cl
+        p = subprocess.Popen(cl)
+        retval = p.wait()
+
+
+        html = self.gen_html(args[0])
+        #html = self.gen_html()
+        return html,retval
+
+    def gen_html(self, dr=os.getcwd()):
+        flist = os.listdir(dr)
+        print flist
+        """ add a list of all files in the tagdirectory
+        """
+        res = ['<div class="module"><h2>Files created by makeTagDirectory</h2><table cellspacing="2" cellpadding="2">\n']
+
+        flist.sort()
+        for i,f in enumerate(flist):
+             if not(os.path.isdir(f)):
+                 fn = os.path.split(f)[-1]
+                 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, dr)))
+
+        res.append('</table>\n')
+
+        return res
+
+if __name__ == '__main__':
+    op = optparse.OptionParser()
+    op.add_option('-e', '--executable', default='makeTagDirectory')
+    op.add_option('-o', '--htmloutput', default=None)
+    op.add_option('-f', '--format', default="sam")
+    opts, args = op.parse_args()
+    #assert os.path.isfile(opts.executable),'## makeTagDirectory.py error - cannot find executable %s' % opts.executable
+
+    #if not os.path.exists(opts.outputdir):
+        #os.makedirs(opts.outputdir)
+    f = makeTagDirectory(opts, args)
+
+    html,retval = f.run_makeTagDirectory()
+    f = open(opts.htmloutput, 'w')
+    f.write(''.join(html))
+    f.close()
+    if retval <> 0:
+         print >> sys.stderr, serr # indicate failure
+
+
+

diff -r 529485c1dda1 -r 687df269e597 makeTagDirectory.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/makeTagDirectory.xml Wed Dec 19 17:28:55 2012 -0500

[

b'@@ -0,0 +1,146 @@\n+<tool id="homer_makeTagDirectory" name="homer_makeTagDirectory" version="1.0.1">\n+ <requirements>\n+ <requirement type="package" version="4.1">homer</requirement>\n+ </requirements>\n+ <description>Simple wrapper for makeTagDirectory. Used by findPeaks</description>\n+ \n+ <command interpreter="python">makeTagDirectory.py ${tagDir.files_path} \n+ #for $alignF in $alignmentFiles\n+ $alignF.file -f $alignF.file.ext\n+ #end for\n+ -o $tagDir\n+ 2> $out_log || echo "Error running homer_makeTagDirectory." >&2\n+\n+ </command>\n+ <inputs>\n+ <param name="title" label="Name for the output tag directory" type="text" default="Homer TagDirectory" />\n+ <param type="text" name="options" label="Extra options" value="" help="See below for more options">\n+ <sanitizer>\n+ <valid initial="string.printable">\n+ <remove value="'"/>\n+ <remove value="/"/>\n+ </valid>\n+ <mapping initial="none">\n+ <add source="'" target="__sq__"/>\n+ </mapping>\n+ </sanitizer>\n+ </param>\n+ <repeat name="alignmentFiles" title="Alignment Files">\n+ <param name="file" label="Add file" type="data" format="sam,bed" help="Alignments in SAM or BED format" />\n+ </repeat>\n+ </inputs>\n+ <outputs>\n+ \n+ <data format="html" name="tagDir" label="${title} tag directory" />\n+ <data format="txt" name="out_log" label="${title}.log" />\n+ \n+ \n+ </outputs>\n+\n+\n+ <tests>\n+ \n+ \n+ \n+ \n+ </tests>\n+\n+ <help>\n+\n+ .. class:: infomark\n+\n+ **Homer makeTagDirectory**\n+\n+ For more options, look under: "Command line options"\n+\n+ http://biowhat.ucsd.edu/homer/ngs/tagDir.html\n+\n+**Parameter list**\n+\n+Command line options (not all of them are supported)::\n+\n+\tUsage: makeTagDirectory <directory> <alignment file 1> [file 2] ... [options]\n+\n+\tCreates a platform-independent 'tag directory' for later analysis.\n+\tCurrently BED, eland, bowtie, and sam files are accepted. The program will try to\n+\tautomatically detect the alignment format if not specified. Program will also\n+\tunzip *.gz, *.bz2, and *.zip files and convert *.bam to sam files on the fly\n+\tExisting tag directories can be added or combined to make a new one using -d/-t\n+\tIf more than one format is needed and the program cannot auto-detect it properly,\n+\tmake separate tag directories by running the program separately, then combine them.\n+\tTo perform QC/manipulations on an existing tag directory, add "-update"\n+\n+\tOptions:\n+\t\t-fragLength <# | given> (Set estimated fragment length - given: use read lengths)\n+\t\t\tBy default treats the sample as a single read ChIP-Seq experiment\n+\t\t-format <X> where X can be: (with column specifications underneath)\n+\t\t\tbed - BED format files:\n+\t\t\t\t(1:chr,2:start,3:end,4:+/- or read name,5:# tags,6:+/-)\n+\t\t\t\t-force5th (5th column of BED file contains # of reads mapping to position)\n+\t\t\tsam - SAM formatted files (use samTools to covert BAMs into SAM if you have BAM)\n+\t\t\t\t-unique (keep if there is a single best alignment based on mapq)\n+\t\t\t\t\t-mapq <#> (Minimum mapq for -unique, default: 10, set negative to use AS:i:/XS:i:)\n+\t\t\t\t-keepOne (keep one of the best alignments even if others exist)\n+\t\t\t\t-keepAll (include all alignments in SAM file)\n+\t\t\t\t-mis (Maximum allowed mismatches, default: no limit, uses MD:Z: tag)\n+\t\t\tbowtie - o'..b'st -k 2 options)\n+\t\t\t\t(1:read name,2:+/-,3:chr,4:position,5:seq,6:quality,7:NA,8:misInfo)\n+\t\t\teland_result - output from basic eland\n+\t\t\t\t(1:read name,2:seq,3:code,4:#zeroMM,5:#oneMM,6:#twoMM,7:chr,\n+\t\t\t\t\t\t\t8:position,9:F/R,10-:mismatches\n+\t\t\teland_export - output from illumina pipeline (22 columns total)\n+\t\t\t\t(1-5:read name info,9:sequence,10:quality,11:chr,13:position,14:strand)\n+\t\t\teland_extended - output from illumina pipeline (4 columns total)\n+\t\t\t\t(1:read name,2:sequence,3:match stats,4:positions[,])\n+\t\t\tmCpGbed - encode style mCpG reporting in extended BED format, no auto-detect\n+\t\t\t\t(1:chr,2:start,3:end,4:name,5:,6:+/-,7:,8:,9:,10:#C,11:#mC)\n+\t\t\tallC - Lister style output files detailing the read information about all cytosines\n+\t\t\t\t(1:chr,2:pos,3:strand,4:context,#mC,#totalC,#C\n+\t\t\t\t-minCounts <#> (minimum number of reads to report mC/C ratios, default: 10)\n+\t\t\t\t-mCcontext <CG|CHG|CHH|all> (only use C's in this context, default: CG)\n+\t\t\tHiCsummary - minimal paired-end read mapping information\n+\t\t\t\t(1:readname,2:chr1,3:5'pos1,4:strand1,5:chr2,6:5'pos2,7:strand2)\n+\t\t-force5th (5th column of BED file contains # of reads mapping to position)\n+\t\t-d <tag directory> [tag directory 2] ... (add Tag directory to new tag directory)\n+\t\t-t <tag file> [tag file 2] ... (add tag file i.e. *.tags.tsv to new tag directory)\n+\t\t-single (Create a single tags.tsv file for all "chromosomes" - i.e. if >100 chromosomes)\n+\t\t-update (Use current tag directory for QC/processing, do not parse new alignment files)\n+\t\t-tbp <#> (Maximum tags per bp, default: no maximum)\n+\t\t-precision <1|2|3> (number of decimal places to use for tag totals, default: 1)\n+\n+\t\tGC-bias options:\n+\t\t-genome <genome version> (To see available genomes, use "-genome list")\n+\t\t\t-or- (for custom genomes):\n+\t\t-genome <path-to-FASTA file or directory of FASTA files>\n+\n+\t\t-checkGC (check Sequence bias, requires "-genome")\n+\t\t\t-freqStart <#> (offset to start calculating frequency, default: -50)\n+\t\t\t-freqEnd <#> (distance past fragment length to calculate frequency, default: +50)\n+\t\t\t-oligoStart <#> (oligo bias start)\n+\t\t\t-oligoEnd <#> (oligo bias end)\n+\t\t-normGC <target GC profile file> (i.e. tagGCcontent.txt file from control experiment)\n+\t\t\tUse "-normGC default" to match the genomic GC distribution\n+\t\t-normFixedOligo <oligoFreqFile> (normalize 5' end bias, "-normFixedOligo default" ok)\n+\t\t-minNormRatio <#> (Minimum deflation ratio of tag counts, default: 0.25)\n+\t\t-maxNormRatio <#> (Maximum inflation ratio of tag counts, default: 2.0)\n+\t\t-iterNorm <#> (Sets -max/minNormRatio to 1 and 0, iteratively normalizes such that the\n+\t\t\tresulting distrubtion is no more than #% different than target, i.e. 0.1,default: off)\n+\n+\tPaired-end/HiC options\n+\t\t-illuminaPE (when matching PE reads, assumes last character of read name is 0 or 1)\n+\t\t-removePEbg (remove paired end tags within 1.5x fragment length on same chr)\n+\t\t\t-PEbgLength <#> (remove PE reads facing on another within this distance, default: 1.5x fragLen)\n+\t\t-restrictionSite <seq> (i.e. AAGCTT for HindIII, assign data < 1.5x fragment length to sites)\n+\t\t\tMust specify genome sequence directory too. (-rsmis <#> to specify mismatches, def: 0)\n+\t\t\t-both, -one, -onlyOne, -none (Keeps reads near restriction sites, default: keep all)\n+\t\t\t-removeSelfLigation (removes reads linking same restriction fragment)\n+\t\t\t-removeRestrictionEnds (removes reads starting on a restriction fragment)\n+\t\t\t-assignMidPoint (will place reads in the middle of HindIII fragments)\n+\t\t\t-restrictionSiteLength <#> (maximum distance from restriction site, default: 1.5x fragLen)\n+\t\t-removeSpikes <size bp> <#> (remove tags from regions with > than # times\n+\t\t\tthe average tags per size bp, suggest "-removeSpikes 10000 5")\n+\n+\n+ </help>\n+</tool>\n+\n'

diff -r 529485c1dda1 -r 687df269e597 pos2bed.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pos2bed.xml Wed Dec 19 17:28:55 2012 -0500

[

@@ -0,0 +1,37 @@
+<tool id="homer_pos2bed" name="homer_pos2bed" version="0.0.3">
+    <requirements>
+        <requirement type="package" version="4.1" >homer</requirement>
+    </requirements>
+    <description></description>
+    
+    <command>
+        pos2bed.pl $input_peak 1> $out_bed
+        2> $out_log || echo "Error running pos2bed." >&2
+    </command>
+    <inputs>
+        <param format="tabular" name="input_peak" type="data" label="Homer peak positions" />
+    </inputs>
+    <outputs>
+        
+        
+        <data format="bed" name="out_bed" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.bed" />
+        <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_peak.name))[0]#.log" />
+    </outputs>
+    <tests>
+        <test>
+            
+            
+        </test>
+    </tests>
+
+    <help>
+        .. class:: infomark
+
+        Converts: homer peak positions -(to)-> BED format
+
+        **Homer pos2bed.pl**
+
+        http://biowhat.ucsd.edu/homer/ngs/miscellaneous.html
+    </help>
+</tool>
+

diff -r 529485c1dda1 -r 687df269e597 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Wed Dec 19 17:28:55 2012 -0500

@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="homer" version="4.1">
+    <install version="4.1">
+      <actions>
+        <action type="download_by_url">http://biowhat.ucsd.edu/homer/configureHomer.pl</action>
+        <action type="shell_command">perl ./configureHomer.pl -install</action>
+        <action type="shell_command">perl ./configureHomer.pl -install hg19</action>
+        <action type="move_directory_files">
+          <source_directory>./</source_directory>
+          <destination_directory>$INSTALL_DIR</destination_directory>
+        </action>
+        <action type="set_environment">
+          <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+        </action>
+      </actions>
+    </install>
+    <readme>
+      Installs homer
+    </readme>
+  </package>
+</tool_dependency>
+