Mercurial > repos > lparsons > ea_utils
diff sam-stats.xml @ 1:cf4b5125a835 draft
Uploaded
author | lparsons |
---|---|
date | Tue, 20 Nov 2012 15:54:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam-stats.xml Tue Nov 20 15:54:14 2012 -0500 @@ -0,0 +1,83 @@ +<tool id="sam_stats" name="sam-stats" version="0.1"> + <description> - Compute statistics from SAM or BAM files</description> + <requirements> + <requirement type="package" version="1.1.2-484">ea-utils</requirement> + </requirements> + <command> + sam-stats + $trackMultAlign + $reportAllChr + #if $rnaSeqStats: + -R $rnaSeqStatsFile + #end if + #if $input.extension == "bam": + -B + #end if + -S $histBinSize + $input + > $samStats + </command> + <inputs> + <param format="sam, bam" name="input" type="data" label="SAM/BAM File" /> + <param name="trackMultAlign" type="boolean" value="False" truevalue="-D" falsevalue="" label="Keep track of multiple alignments (slower!)" /> + <param name="reportAllChr" type="boolean" value="False" truevalue="-A" falsevalue="" label="Report all chr sigs, even if there are more than 1000" /> + <!-- <param name="numReadsSubsample" type="integer" value="1000000" min="1" max="1000000" label="Number of reads to sample for per-base statistics (max 1,000,000)" /> --> + <param name="histBinSize" type="integer" value="30" min="1" label="Number of bins per chromosome for reads by chromosome "histogram"" /> + <param name="rnaSeqStats" type="boolean" value="False" label="Output RNA-Seq statistics (coverage, 3 prime bias, etc.)" /> + </inputs> + + <outputs> + <data format="tabular" name="samStats" label="${tool.name} on ${on_string}"/> + <data format="tabular" name="rnaSeqStatsFile" label="${tool.name} on ${on_string} (RNA-Seq Stats)"> <filter>rnaSeqStats</filter> + </data> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" description="Unknown error occurred" /> + </stdio> + + <tests> + <test> + <param name="input" value="test.sam" /> + <output name="samStats" file="testout.txt" /> + </test> + </tests> + + <help> +Overview +-------- +sam-stats computes varius statics on SAM/BAM alignment files. + +Complete Stats:: + + <STATS> : mean, max, stdev, median, Q1 (25 percentile), Q3 + reads : # of entries in the sam file, might not be # reads + phred : phred scale used + bsize : # reads used for qual stats + mapped reads : number of aligned reads (unique probe id sequences) + mapped bases : total of the lengths of the aligned reads + forward : number of forward-aligned reads + reverse : number of reverse-aligned reads + snp rate : mismatched bases / total bases + ins rate : insert bases / total bases + del rate : deleted bases / total bases + pct mismatch : percent of reads that have mismatches + len <STATS> : read length stats, ignored if fixed-length + mapq <STATS> : stats for mapping qualities + insert <STATS> : stats for insert sizes + <CHR> : percentage of mapped bases per chr, followed by a signature + +Subsampled stats (1M reads max):: + + base qual <STATS> : stats for base qualities + A,T,C,G : base percentages + +Meaning of the per-chromosome signature: + + A ascii-histogram of mapped reads by chromosome position. It is only output if the original SAM/BAM has a header. The values are the log2 of the # of mapped reads at each position + ascii '0'. + +See http://code.google.com/p/ea-utils/wiki/SamStatsDetails for more information on each stat, how it's calculated and what it means. + +This tool uses the sam-stats program that is part of the ea-utils suite. See http://code.google.com/p/ea-utils/wiki/SamStats for details. + </help> +</tool>