homer: annotatePeaks.xml annotate

annotate annotatePeaks.xml @ 28:f0b5827b6051 draft default tip

Uploaded

author	kevyin
date	Thu, 20 Dec 2012 18:28:03 -0500
parents
children

rev	line source
28 f0b5827b6051 Uploaded kevyin parents: diff changeset	1 <tool id="homer_annotatePeaks" name="homer_annotatePeaks" version="0.0.5">
f0b5827b6051 Uploaded kevyin parents: diff changeset	2 <requirements>
f0b5827b6051 Uploaded kevyin parents: diff changeset	3 <requirement type="package" version="4.1">homer</requirement>
f0b5827b6051 Uploaded kevyin parents: diff changeset	4 </requirements>
f0b5827b6051 Uploaded kevyin parents: diff changeset	5 <description></description>
f0b5827b6051 Uploaded kevyin parents: diff changeset	6 <!--<version_command></version_command>-->
f0b5827b6051 Uploaded kevyin parents: diff changeset	7 <command>
f0b5827b6051 Uploaded kevyin parents: diff changeset	8 annotatePeaks.pl $input_bed $genome_selector 1> $out_annotated
f0b5827b6051 Uploaded kevyin parents: diff changeset	9 2> $out_log \|\| echo "Error running annotatePeaks." >&2
f0b5827b6051 Uploaded kevyin parents: diff changeset	10 </command>
f0b5827b6051 Uploaded kevyin parents: diff changeset	11 <inputs>
f0b5827b6051 Uploaded kevyin parents: diff changeset	12 <param format="tabular,bed" name="input_bed" type="data" label="Homer peaks OR BED format"/>
f0b5827b6051 Uploaded kevyin parents: diff changeset	13 <param name="genome_selector" type="select" label="Genome version">
f0b5827b6051 Uploaded kevyin parents: diff changeset	14 <option value="hg19" selected="true">hg19</option>
f0b5827b6051 Uploaded kevyin parents: diff changeset	15 </param>
f0b5827b6051 Uploaded kevyin parents: diff changeset	16 <param type="text" name="options" label="Extra options" value="" help="See link below for more options">
f0b5827b6051 Uploaded kevyin parents: diff changeset	17 <sanitizer>
f0b5827b6051 Uploaded kevyin parents: diff changeset	18 <valid initial="string.printable">
f0b5827b6051 Uploaded kevyin parents: diff changeset	19 <remove value="'"/>
f0b5827b6051 Uploaded kevyin parents: diff changeset	20 <remove value="/"/>
f0b5827b6051 Uploaded kevyin parents: diff changeset	21 </valid>
f0b5827b6051 Uploaded kevyin parents: diff changeset	22 <mapping initial="none">
f0b5827b6051 Uploaded kevyin parents: diff changeset	23 <add source="'" target="__sq__"/>
f0b5827b6051 Uploaded kevyin parents: diff changeset	24 </mapping>
f0b5827b6051 Uploaded kevyin parents: diff changeset	25 </sanitizer>
f0b5827b6051 Uploaded kevyin parents: diff changeset	26 </param>
f0b5827b6051 Uploaded kevyin parents: diff changeset	27 </inputs>
f0b5827b6051 Uploaded kevyin parents: diff changeset	28 <outputs>
f0b5827b6051 Uploaded kevyin parents: diff changeset	29 <!--<data format="html" name="html_outfile" label="index" />-->
f0b5827b6051 Uploaded kevyin parents: diff changeset	30 <!--<data format="html" hidden="True" name="html_outfile" label="index.html" />-->
f0b5827b6051 Uploaded kevyin parents: diff changeset	31 <data format="csv" name="out_annotated" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}" />
f0b5827b6051 Uploaded kevyin parents: diff changeset	32 <data format="txt" name="out_log" label="${tool.name} on #echo os.path.splitext(str($input_bed.name))[0]#_genome_${genome_selector}.log" />
f0b5827b6051 Uploaded kevyin parents: diff changeset	33 </outputs>
f0b5827b6051 Uploaded kevyin parents: diff changeset	34 <tests>
f0b5827b6051 Uploaded kevyin parents: diff changeset	35 <test>
f0b5827b6051 Uploaded kevyin parents: diff changeset	36 <!--<param name="input_file" value="extract_genomic_dna.fa" />-->
f0b5827b6051 Uploaded kevyin parents: diff changeset	37 <!--<output name="html_file" file="sample_output.html" ftype="html" />-->
f0b5827b6051 Uploaded kevyin parents: diff changeset	38 </test>
f0b5827b6051 Uploaded kevyin parents: diff changeset	39 </tests>
f0b5827b6051 Uploaded kevyin parents: diff changeset	40
f0b5827b6051 Uploaded kevyin parents: diff changeset	41 <help>
f0b5827b6051 Uploaded kevyin parents: diff changeset	42
f0b5827b6051 Uploaded kevyin parents: diff changeset	43 .. class:: infomark
f0b5827b6051 Uploaded kevyin parents: diff changeset	44
f0b5827b6051 Uploaded kevyin parents: diff changeset	45 Homer annoatePeaks
f0b5827b6051 Uploaded kevyin parents: diff changeset	46
f0b5827b6051 Uploaded kevyin parents: diff changeset	47 More information on accepted formats and options
f0b5827b6051 Uploaded kevyin parents: diff changeset	48
f0b5827b6051 Uploaded kevyin parents: diff changeset	49 http://biowhat.ucsd.edu/homer/ngs/annotation.html
f0b5827b6051 Uploaded kevyin parents: diff changeset	50
f0b5827b6051 Uploaded kevyin parents: diff changeset	51 TIP: use homer_bed2pos and homer_pos2bed to convert between the homer peak positions and the BED format.
f0b5827b6051 Uploaded kevyin parents: diff changeset	52
f0b5827b6051 Uploaded kevyin parents: diff changeset	53 Parameter list
f0b5827b6051 Uploaded kevyin parents: diff changeset	54
f0b5827b6051 Uploaded kevyin parents: diff changeset	55 Command line options (not all of them are supported)::
f0b5827b6051 Uploaded kevyin parents: diff changeset	56
f0b5827b6051 Uploaded kevyin parents: diff changeset	57 Usage: annotatePeaks.pl <peak file \| tss> <genome version> [additional options...]
f0b5827b6051 Uploaded kevyin parents: diff changeset	58
f0b5827b6051 Uploaded kevyin parents: diff changeset	59 Available Genomes (required argument): (name,org,directory,default promoter set)
f0b5827b6051 Uploaded kevyin parents: diff changeset	60 -- or --
f0b5827b6051 Uploaded kevyin parents: diff changeset	61 Custom: provide the path to genome FASTA files (directory or single file)
f0b5827b6051 Uploaded kevyin parents: diff changeset	62
f0b5827b6051 Uploaded kevyin parents: diff changeset	63 User defined annotation files (default is UCSC refGene annotation):
f0b5827b6051 Uploaded kevyin parents: diff changeset	64 annotatePeaks.pl accepts GTF (gene transfer formatted) files to annotate positions relative
f0b5827b6051 Uploaded kevyin parents: diff changeset	65 to custom annotations, such as those from de novo transcript discovery or Gencode.
f0b5827b6051 Uploaded kevyin parents: diff changeset	66 -gtf <gtf format file> (-gff and -gff3 can work for those files, but GTF is better)
f0b5827b6051 Uploaded kevyin parents: diff changeset	67
f0b5827b6051 Uploaded kevyin parents: diff changeset	68 Peak vs. tss/tts/rna mode (works with custom GTF file):
f0b5827b6051 Uploaded kevyin parents: diff changeset	69 If the first argument is "tss" (i.e. annotatePeaks.pl tss hg18 ...) then a TSS centric
f0b5827b6051 Uploaded kevyin parents: diff changeset	70 analysis will be carried out. Tag counts and motifs will be found relative to the TSS.
f0b5827b6051 Uploaded kevyin parents: diff changeset	71 (no position file needed) ["tts" now works too - e.g. 3' end of gene]
f0b5827b6051 Uploaded kevyin parents: diff changeset	72 ["rna" specifies gene bodies, will automaticall set "-size given"]
f0b5827b6051 Uploaded kevyin parents: diff changeset	73 NOTE: The default TSS peak size is 4000 bp, i.e. +/- 2kb (change with -size option)
f0b5827b6051 Uploaded kevyin parents: diff changeset	74 -list <gene id list> (subset of genes to perform analysis [unigene, gene id, accession,
f0b5827b6051 Uploaded kevyin parents: diff changeset	75 probe, etc.], default = all promoters)
f0b5827b6051 Uploaded kevyin parents: diff changeset	76 -cTSS <promoter position file i.e. peak file> (should be centered on TSS)
f0b5827b6051 Uploaded kevyin parents: diff changeset	77
f0b5827b6051 Uploaded kevyin parents: diff changeset	78 Primary Annotation Options:
f0b5827b6051 Uploaded kevyin parents: diff changeset	79 -mask (Masked repeats, can also add 'r' to end of genome name)
f0b5827b6051 Uploaded kevyin parents: diff changeset	80 -m <motif file 1> [motif file 2] ... (list of motifs to find in peaks)
f0b5827b6051 Uploaded kevyin parents: diff changeset	81 -mscore (reports the highest log-odds score within the peak)
f0b5827b6051 Uploaded kevyin parents: diff changeset	82 -nmotifs (reports the number of motifs per peak)
f0b5827b6051 Uploaded kevyin parents: diff changeset	83 -mdist (reports distance to closest motif)
f0b5827b6051 Uploaded kevyin parents: diff changeset	84 -mfasta <filename> (reports sites in a fasta file - for building new motifs)
f0b5827b6051 Uploaded kevyin parents: diff changeset	85 -fm <motif file 1> [motif file 2] (list of motifs to filter from above)
f0b5827b6051 Uploaded kevyin parents: diff changeset	86 -rmrevopp <#> (only count sites found within <#> on both strands once, i.e. palindromic)
f0b5827b6051 Uploaded kevyin parents: diff changeset	87 -matrix <prefix> (outputs a motif co-occurrence files:
f0b5827b6051 Uploaded kevyin parents: diff changeset	88 prefix.count.matrix.txt - number of peaks with motif co-occurrence
f0b5827b6051 Uploaded kevyin parents: diff changeset	89 prefix.ratio.matrix.txt - ratio of observed vs. expected co-occurrence
f0b5827b6051 Uploaded kevyin parents: diff changeset	90 prefix.logPvalue.matrix.txt - co-occurrence enrichment
f0b5827b6051 Uploaded kevyin parents: diff changeset	91 prefix.stats.txt - table of pair-wise motif co-occurrence statistics
f0b5827b6051 Uploaded kevyin parents: diff changeset	92 additional options:
f0b5827b6051 Uploaded kevyin parents: diff changeset	93 -matrixMinDist <#> (minimum distance between motif pairs - to avoid overlap)
f0b5827b6051 Uploaded kevyin parents: diff changeset	94 -matrixMaxDist <#> (maximum distance between motif pairs)
f0b5827b6051 Uploaded kevyin parents: diff changeset	95 -mbed <filename> (Output motif positions to a BED file to load at UCSC (or -mpeak))
f0b5827b6051 Uploaded kevyin parents: diff changeset	96 -mlogic <filename> (will output stats on common motif orientations)
f0b5827b6051 Uploaded kevyin parents: diff changeset	97 -d <tag directory 1> [tag directory 2] ... (list of experiment directories to show
f0b5827b6051 Uploaded kevyin parents: diff changeset	98 tag counts for) NOTE: -dfile <file> where file is a list of directories in first column
f0b5827b6051 Uploaded kevyin parents: diff changeset	99 -bedGraph <bedGraph file 1> [bedGraph file 2] ... (read coverage counts from bedGraph files)
f0b5827b6051 Uploaded kevyin parents: diff changeset	100 -wig <wiggle file 1> [wiggle file 2] ... (read coverage counts from wiggle files)
f0b5827b6051 Uploaded kevyin parents: diff changeset	101 -p <peak file> [peak file 2] ... (to find nearest peaks)
f0b5827b6051 Uploaded kevyin parents: diff changeset	102 -pdist to report only distance (-pdist2 gives directional distance)
f0b5827b6051 Uploaded kevyin parents: diff changeset	103 -pcount to report number of peaks within region
f0b5827b6051 Uploaded kevyin parents: diff changeset	104 -vcf <VCF file> (annotate peaks with genetic variation infomation, one col per individual)
f0b5827b6051 Uploaded kevyin parents: diff changeset	105 -editDistance (Computes the # bp changes relative to reference)
f0b5827b6051 Uploaded kevyin parents: diff changeset	106 -individuals <name1> [name2] ... (restrict analysis to these individuals)
f0b5827b6051 Uploaded kevyin parents: diff changeset	107 -gene <data file> ... (Adds additional data to result based on the closest gene.
f0b5827b6051 Uploaded kevyin parents: diff changeset	108 This is useful for adding gene expression data. The file must have a header,
f0b5827b6051 Uploaded kevyin parents: diff changeset	109 and the first column must be a GeneID, Accession number, etc. If the peak
f0b5827b6051 Uploaded kevyin parents: diff changeset	110 cannot be mapped to data in the file then the entry will be left empty.
f0b5827b6051 Uploaded kevyin parents: diff changeset	111 -go <output directory> (perform GO analysis using genes near peaks)
f0b5827b6051 Uploaded kevyin parents: diff changeset	112 -genomeOntology <output directory> (perform genomeOntology analysis on peaks)
f0b5827b6051 Uploaded kevyin parents: diff changeset	113 -gsize <#> (Genome size for genomeOntology analysis, default: 2e9)
f0b5827b6051 Uploaded kevyin parents: diff changeset	114
f0b5827b6051 Uploaded kevyin parents: diff changeset	115 Annotation vs. Histogram mode:
f0b5827b6051 Uploaded kevyin parents: diff changeset	116 -hist <bin size in bp> (i.e 1, 2, 5, 10, 20, 50, 100 etc.)
f0b5827b6051 Uploaded kevyin parents: diff changeset	117 The -hist option can be used to generate histograms of position dependent features relative
f0b5827b6051 Uploaded kevyin parents: diff changeset	118 to the center of peaks. This is primarily meant to be used with -d and -m options to map
f0b5827b6051 Uploaded kevyin parents: diff changeset	119 distribution of motifs and ChIP-Seq tags. For ChIP-Seq peaks for a Transcription factor
f0b5827b6051 Uploaded kevyin parents: diff changeset	120 you might want to use the -center option (below) to center peaks on the known motif
f0b5827b6051 Uploaded kevyin parents: diff changeset	121 ** If using "-size given", histogram will be scaled to each region (i.e. 0-100%), with
f0b5827b6051 Uploaded kevyin parents: diff changeset	122 the -hist parameter being the number of bins to divide each region into.
f0b5827b6051 Uploaded kevyin parents: diff changeset	123 Histogram Mode specific Options:
f0b5827b6051 Uploaded kevyin parents: diff changeset	124 -nuc (calculated mononucleotide frequencies at each position,
f0b5827b6051 Uploaded kevyin parents: diff changeset	125 Will report by default if extracting sequence for other purposes like motifs)
f0b5827b6051 Uploaded kevyin parents: diff changeset	126 -di (calculated dinucleotide frequencies at each position)
f0b5827b6051 Uploaded kevyin parents: diff changeset	127 -histNorm <#> (normalize the total tag count for each region to 1, where <#> is the
f0b5827b6051 Uploaded kevyin parents: diff changeset	128 minimum tag total per region - use to avoid tag spikes from low coverage
f0b5827b6051 Uploaded kevyin parents: diff changeset	129 -ghist (outputs profiles for each gene, for peak shape clustering)
f0b5827b6051 Uploaded kevyin parents: diff changeset	130 -rm <#> (remove occurrences of same motif that occur within # bp)
f0b5827b6051 Uploaded kevyin parents: diff changeset	131
f0b5827b6051 Uploaded kevyin parents: diff changeset	132 Peak Centering: (other options are ignored)
f0b5827b6051 Uploaded kevyin parents: diff changeset	133 -center <motif file> (This will re-center peaks on the specified motif, or remove peak
f0b5827b6051 Uploaded kevyin parents: diff changeset	134 if there is no motif in the peak. ONLY recentering will be performed, and all other
f0b5827b6051 Uploaded kevyin parents: diff changeset	135 options will be ignored. This will output a new peak file that can then be reanalyzed
f0b5827b6051 Uploaded kevyin parents: diff changeset	136 to reveal fine-grain structure in peaks (It is advised to use -size < 200) with this
f0b5827b6051 Uploaded kevyin parents: diff changeset	137 to keep peaks from moving too far (-mirror flips the position)
f0b5827b6051 Uploaded kevyin parents: diff changeset	138 -multi (returns genomic positions of all sites instead of just the closest to center)
f0b5827b6051 Uploaded kevyin parents: diff changeset	139
f0b5827b6051 Uploaded kevyin parents: diff changeset	140 Advanced Options:
f0b5827b6051 Uploaded kevyin parents: diff changeset	141 -len <#> / -fragLength <#> (Fragment length, default=auto, might want to set to 0 for RNA)
f0b5827b6051 Uploaded kevyin parents: diff changeset	142 -size <#> (Peak size[from center of peak], default=inferred from peak file)
f0b5827b6051 Uploaded kevyin parents: diff changeset	143 -size #,# (i.e. -size -10,50 count tags from -10 bp to +50 bp from center)
f0b5827b6051 Uploaded kevyin parents: diff changeset	144 -size "given" (count tags etc. using the actual regions - for variable length regions)
f0b5827b6051 Uploaded kevyin parents: diff changeset	145 -log (output tag counts as log2(x+1+rand) values - for scatter plots)
f0b5827b6051 Uploaded kevyin parents: diff changeset	146 -sqrt (output tag counts as sqrt(x+rand) values - for scatter plots)
f0b5827b6051 Uploaded kevyin parents: diff changeset	147 -strand <+\|-\|both> (Count tags on specific strands relative to peak, default: both)
f0b5827b6051 Uploaded kevyin parents: diff changeset	148 -pc <#> (maximum number of tags to count per bp, default=0 [no maximum])
f0b5827b6051 Uploaded kevyin parents: diff changeset	149 -cons (Retrieve conservation information for peaks/sites)
f0b5827b6051 Uploaded kevyin parents: diff changeset	150 -CpG (Calculate CpG/GC content)
f0b5827b6051 Uploaded kevyin parents: diff changeset	151 -ratio (process tag values as ratios - i.e. chip-seq, or mCpG/CpG)
f0b5827b6051 Uploaded kevyin parents: diff changeset	152 -nfr (report nuclesome free region scores instead of tag counts, also -nfrSize <#>)
f0b5827b6051 Uploaded kevyin parents: diff changeset	153 -norevopp (do not search for motifs on the opposite strand [works with -center too])
f0b5827b6051 Uploaded kevyin parents: diff changeset	154 -noadj (do not adjust the tag counts based on total tags sequenced)
f0b5827b6051 Uploaded kevyin parents: diff changeset	155 -norm <#> (normalize tags to this tag count, default=1e7, 0=average tag count in all directories)
f0b5827b6051 Uploaded kevyin parents: diff changeset	156 -pdist (only report distance to nearest peak using -p, not peak name)
f0b5827b6051 Uploaded kevyin parents: diff changeset	157 -map <mapping file> (mapping between peak IDs and promoter IDs, overrides closest assignment)
f0b5827b6051 Uploaded kevyin parents: diff changeset	158 -noann, -nogene (skip genome annotation step, skip TSS annotation)
f0b5827b6051 Uploaded kevyin parents: diff changeset	159 -homer1/-homer2 (by default, the new version of homer [-homer2] is used for finding motifs)
f0b5827b6051 Uploaded kevyin parents: diff changeset	160
f0b5827b6051 Uploaded kevyin parents: diff changeset	161
f0b5827b6051 Uploaded kevyin parents: diff changeset	162 </help>
f0b5827b6051 Uploaded kevyin parents: diff changeset	163 </tool>
f0b5827b6051 Uploaded kevyin parents: diff changeset	164

Mercurial > repos > kevyin > homer

annotate annotatePeaks.xml @ 28:f0b5827b6051 draft default tip