changeset 29:907d4b021ff6

Uploaded
author nilesh
date Thu, 11 Jul 2013 12:31:33 -0400
parents 6e438a559a10
children b5d2f575ccb6
files RPKM_count.xml RPKM_saturation.xml bam2wig.xml bam_stat.xml clipping_profile.xml geneBody_coverage.xml geneBody_coverage2.xml infer_experiment.xml inner_distance.xml junction_annotation.xml junction_saturation.xml read_GC.xml read_NVC.xml read_distribution.xml read_duplication.xml read_quality.xml samtoolshelper.py tool_dependencies.xml
diffstat 18 files changed, 1300 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/RPKM_count.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,147 @@
+<tool id="RPKM_count" name="RPKM Count">
+	<description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
+	<requirements>
+		<requirement type="package" version="0.1.18">samtools</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> samtoolshelper.py RPKM_count.py -i $input -o output -r $refgene
+
+		#if $nx
+			-x
+		#end if
+		
+		#if str($strand_type.strand_specific) == "pair"
+			-d
+			#if str($strand_type.pair_type) == "sd"
+				'1++,1--,2+-,2-+'
+			#else
+				'1+-,1-+,2++,2--'
+			#end if
+		#end if
+
+		#if str($strand_type.strand_specific) == "single"
+			-d
+			#if str($strand_type.single_type) == "s"
+				'++,--'
+			#else
+				'+-,-+'
+			#end if
+		#end if
+
+		#if $skiphits
+			-u
+		#end if
+
+		#if $onlyexonic
+			-e
+		#end if
+
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam" label="input bam/sam file" />
+		<param name="refgene" type="data" format="bed" label="Reference gene model" />
+		<conditional name="strand_type">
+			<param name="strand_specific" type="select" label="Strand-specific?" value="None">
+				<option value="none">None</option>
+				<option value="pair">Pair-End RNA-seq</option>
+				<option value="single">Single-End RNA-seq</option>
+			</param>
+			<when value="pair">
+				<param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd">
+					<option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
+					<option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
+				</param>
+			</when>
+			<when value="single">
+				<param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s">
+					<option value="s">positive --> positive; negative --> negative</option>
+					<option value="d">positive --> negative; negative --> positive</option>
+				</param>
+			</when>
+			<when value="none"></when>
+		</conditional>
+		<param name="skiphits" type="boolean" value="false" label="Skip Multiple Hit Reads" />
+		<param name="onlyexonic" type="boolean" value="false" label="Only use exonic (UTR exons and CDS exons) reads, otherwise use all reads" />
+	</inputs>
+	<outputs>
+		<data format="xls" name="outputxls" from_work_dir="output_read_count.xls"/>
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene model in BED format.
+
+Strand sequencing type (default=none)
+	See Infer Experiment tool if uncertain.
+
+Options
+++++++++++++++
+
+Skip Multiple Hit Reads
+	Use Multiple hit reads or use only uniquely mapped reads.
+
+Only use exonic reads 
+	Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads.
+
+Sample Output
+++++++++++++++
+
+=====	=====		===			=========				=====	===========		=============	=============	========	========
+chrom	start		end			accession				score	gene strand 	tag count (+)	tag count (-)	RPKM (+)	RPKM (-)
+=====	=====		===			=========				=====	===========		=============	=============	========	========
+chr1	29213722	29313959	NM_001166007_intron_1	0		+				431				4329			0.086		0.863
+chr1	29314417	29319841	NM_001166007_intron_2	0		+				31				1				0.114		0.004
+chr1	29320054	29323726	NM_001166007_intron_3	0		+				32				0				0.174		0
+chr1	29323831	29338376	NM_001166007_intron_4	0		+				33				2				0.045		0.003
+chr1	29338419	29342203	NM_001166007_intron_5	0		+				7				0				0.037		0
+chr1	29342279	29344735	NM_001166007_intron_6	0		+				35				4				0.285		0.033
+chr1	29344954	29356911	NM_001166007_intron_7	0		+				34				2				0.057		0.003
+chr1	29356999	29359604	NM_001166007_intron_8	0		+				19				1				0.146		0.008
+chr1	29359757	29362337	NM_001166007_intron_9	0		+				31				0				0.24		0
+chr1	29362435	29365765	NM_001166007_intron_10	0		+				11				1				0.066		0.006
+chr1	29365938	29379615	NM_001166007_intron_11	0		+				63				0				0.092		0
+chr1	29379824	29391493	NM_001166007_intron_12	0		+				383				8				0.656		0.014
+chr1	29391670	29424318	NM_001166007_intron_13	0		+				817				10				0.5			0.006
+chr1	29424447	29435847	NM_001166007_intron_14	0		+				28				0				0.049		0
+chr1	29435949	29438879	NM_001166007_intron_15	0		+				12				0				0.082		0
+chr1	29438960	29442210	NM_001166007_intron_16	0		+				22				2				0.135		0.012
+chr1	29442315	29443330	NM_001166007_intron_17	0		+				9				0				0.177		0
+chr1	29213602	29213722	NM_001166007_exon_1		0		+				164				0				27.321		0
+chr1	29313959	29314417	NM_001166007_exon_2		0		+				1699			4				74.158		0.175
+chr1	29319841	29320054	NM_001166007_exon_3		0		+				528				1				49.554		0.094
+chr1	29323726	29323831	NM_001166007_exon_4		0		+				168				0				31.985		0
+chr1	29338376	29338419	NM_001166007_exon_5		0		+				88				0				40.911		0
+chr1	29342203	29342279	NM_001166007_exon_6		0		+				114				3				29.986		0.789
+chr1	29344735	29344954	NM_001166007_exon_7		0		+				290				10				26.472		0.913
+chr1	29356911	29356999	NM_001166007_exon_8		0		+				146				1				33.166		0.227
+chr1	29359604	29359757	NM_001166007_exon_9		0		+				404				11				52.786		1.437
+chr1	29362337	29362435	NM_001166007_exon_10	0		+				85				7				17.339		1.428
+chr1	29365765	29365938	NM_001166007_exon_11	0		+				198				2				22.88		0.231
+chr1	29379615	29379824	NM_001166007_exon_12	0		+				306				5				29.269		0.478
+chr1	29391493	29391670	NM_001166007_exon_13	0		+				243				7				27.445		0.791
+chr1	29424318	29424447	NM_001166007_exon_14	0		+				298				7				46.18		1.085
+chr1	29435847	29435949	NM_001166007_exon_15	0		+				396				8				77.611		1.568
+chr1	29438879	29438960	NM_001166007_exon_16	0		+				307				0				75.767		0
+chr1	29442210	29442315	NM_001166007_exon_17	0		+				138				0				26.273		0
+chr1	29443330	29446558	NM_001166007_exon_18	0		+				2434			84				15.074		0.52
+chr1	29213602	29446558	NM_001166007_mRNA		0		+				8006			150				27.704		0.519
+=====	=====		===			=========				=====	===========		=============	=============	========	========
+	
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/RPKM_saturation.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,118 @@
+<tool id="RPKM_saturation" name="RPKM Saturation">
+	<description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> RPKM_saturation.py -i $input -o output -r $refgene
+
+		#if str($strand_type.strand_specific) == "pair"
+			-d
+			#if str($strand_type.pair_type) == "sd"
+				'1++,1--,2+-,2-+'
+			#else
+				'1+-,1-+,2++,2--'
+			#end if
+		#end if
+
+		#if str($strand_type.strand_specific) == "single"
+			-d
+			#if str($strand_type.single_type) == "s"
+				'++,--'
+			#else
+				'+-,-+'
+			#end if
+		#end if
+
+		-l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff
+
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam" label="input bam/sam file" />
+		<param name="refgene" type="data" format="bed" label="Reference gene model" />
+		<conditional name="strand_type">
+			<param name="strand_specific" type="select" label="Strand-specific?" value="None">
+				<option value="none">None</option>
+				<option value="pair">Pair-End RNA-seq</option>
+				<option value="single">Single-End RNA-seq</option>
+			</param>
+			<when value="pair">
+				<param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd">
+					<option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
+					<option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
+				</param>
+			</when>
+			<when value="single">
+				<param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s">
+					<option value="s">positive --> positive; negative --> negative</option>
+					<option value="d">positive --> negative; negative --> positive</option>
+				</param>
+			</when>
+			<when value="none"></when>
+		</conditional>
+		<param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" />
+		<param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" />
+		<param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" />
+		<param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" />
+	</inputs>
+	<outputs>
+		<data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls"/>
+		<data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls"/>
+		<data format="r" name="outputr" from_work_dir="output.saturation.r"/>
+		<data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf"/>
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene model in BED format.
+
+Strand sequencing type (default=none)
+	See Infer Experiment tool if uncertain.
+
+Options
+++++++++++++++
+
+Skip Multiple Hit Reads
+	Use Multiple hit reads or use only uniquely mapped reads.
+
+Only use exonic reads 
+	Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads.
+
+Output
+++++++++++++++
+
+1. output..eRPKM.xls: RPKM values for each transcript
+2. output.rawCount.xls: Raw count for each transcript
+3. output.saturation.r: R script to generate plot
+4. output.saturation.pdf:
+
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/saturation.png
+
+- All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups:
+	1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile.
+	2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile.
+	3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile.
+	4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile.
+- BAM/SAM file containing more than 100 million alignments will make module very slow.
+- Follow example below to visualize a particular transcript (using R console)::
+- output example
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/saturation_eg.png
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam2wig.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,121 @@
+<tool id="bam2wig" name="BAM to Wiggle">
+	<description> 
+		converts all types of RNA-seq data from .bam to .wig 
+	</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="0.1.18">samtools</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> 
+		samtoolshelper.py /home/nilesh/RSeQC-2.3.3/scripts/bam2wig.py -i $input -s $chromsize -o outfile
+
+		#if str($strand_type.strand_specific) == "pair"
+			-d
+			#if str($strand_type.pair_type) == "sd"
+				'1++,1--,2+-,2-+'
+			#else
+				'1+-,1-+,2++,2--'
+			#end if
+		#end if
+
+		#if str($strand_type.strand_specific) == "single"
+			-d
+			#if str($strand_type.single_type) == "s"
+				'++,--'
+			#else
+				'+-,-+'
+			#end if
+		#end if
+
+		#if $wigsum.wigsum_type
+			-t $wigsum.totalwig
+		#end if
+
+		#if $skipmultihits
+			-u
+		#end if
+	</command>
+	<inputs>
+		<param name="input" type="data" label="Input .bam File" format="bam" />
+		<param name="chromsize" type="data" label="Chromosome size file (tab or space separated)" format="txt,tabular" />
+		<param name="skipmultihits" type="boolean" label="Skip Multiple Hit Reads/Only Use Uniquely Mapped Reads" value="false" />
+		<conditional name="wigsum">
+			<param name="wigsum_type" type="boolean" label="Specify wigsum?" value="false">
+			</param>
+			<when value="true">
+				<param name="totalwig" value="0" type="integer" label="specified wigsum" />
+			</when>
+			<when value="false"></when>
+		</conditional>
+		<conditional name="strand_type">
+			<param name="strand_specific" type="select" label="Strand-specific?" value="none">
+				<option value="none">none</option>
+				<option value="pair">Pair-End RNA-seq</option>
+				<option value="single">Single-End RNA-seq</option>
+			</param>
+			<when value="pair">
+				<param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd">
+					<option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
+					<option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
+				</param>
+			</when>
+			<when value="single">
+				<param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s">
+					<option value="s">positive --> positive; negative --> negative</option>
+					<option value="d">positive --> negative; negative --> positive</option>
+				</param>
+			</when>
+			<when value="none"></when>
+		</conditional>
+	</inputs>
+	<outputs> 
+		<data format="wig" name="output" from_work_dir="outfile.wig">
+			<filter>strand_type['strand_specific'] == 'none'</filter>
+		</data>
+		<data format="wig" name="outputfwd" from_work_dir="outfile_Forward.wig">
+			<filter>strand_type['strand_specific'] != 'none'</filter>
+		</data>
+		<data format="wig" name="outputrv" from_work_dir="outfile_Reverse.wig">
+			<filter>strand_type['strand_specific'] != 'none'</filter>
+		</data>
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM file
+	Alignment file in BAM format (SAM is not supported). BAM file will be sorted and indexed using samTools.
+
+Chromosome size file
+	Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. Chromosome names (such as "chr1") should be consistent between this file and BAM file.
+
+Specified wigsum (default=none)
+	Specified wigsum. Wigsum of 100000000 equals to coverage achieved by 1 million 100nt reads. Ignore this option to disable normalization.
+
+Skip multiple Hit reads
+	skips multiple hit reads or only use uniquely mapped reads
+
+Strand-specific (default=none)
+	How read(s) were stranded during sequencing. If you are not sure about the strand rule, run infer_experiment.py
+
+Outputs
+++++++++++++++
+
+If RNA-seq is not strand specific, one wig file will be generated, if RNA-seq
+is strand specific, two wig files corresponding to Forward and Reverse will be generated.
+
+
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_stat.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,49 @@
+<tool id="bam_stat" name="BAM/SAM Mapping Stats">
+	<description>
+		reads mapping statistics for a provided BAM or SAM file.
+	</description>
+	<requirements>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>s
+	<command interpreter="python">
+		bam_stat.py -i $input -q $mapqual > $output
+	</command>
+	<inputs>
+		<param name="input" type="data" label="Input .bam/.sam File" format="bam,sam" />
+		<param label="Minimum mapping quality (default=30" type="integer" value="30" name="mapqual" />
+	</inputs>
+	<outputs>
+		<data format="txt" name="output" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Minimum mapping quality
+	Minimum mapping quality for an alignment to be called “uniquely mapped” (default=30)
+
+Output
+++++++++++++++
+
+- Total Reads (Total records) = {Multiple mapped reads} + {Uniquely mapped}
+- Uniquely mapped Reads = {read-1} + {read-2} (if paired end)
+- Uniquely mapped Reads = {Reads map to '+'} + {Reads map to '-'}
+- Uniquely mapped Reads = {Splice reads} + {Non-splice reads}
+
+
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clipping_profile.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,45 @@
+<tool id="clipping_profile" name="Clipping Profile">
+	<description>
+	 estimates clipping profile of RNA-seq reads from BAM or SAM file
+	</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python">
+		clipping_profile.py -i $input -o output
+	</command>
+	<inputs>
+		<param name="input" type="data" label="Input .bam/.sam File" format="bam,sam" />
+	</inputs>
+	<outputs>
+		<data format="xls" name="outputxls" from_work_dir="output.clipping_profile.xls" />
+		<data format="r" name="outputr" from_work_dir="output.clipping_profile.r" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+
+Sample Output
+++++++++++++++
+
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/clipping_good.png
+
+
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geneBody_coverage.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,54 @@
+<tool id="geneBody_coverage" name="Gene Body Converage (BAM)">
+	<description>
+		Read coverage over gene body.
+	</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python">
+		geneBody_coverage.py -i $input -r $refgene -o output
+	</command>
+	<inputs>
+		<param name="input" type="data" label="Input .bam file" format="bam" />
+		<param name="refgene" type="data" label="Reference Genome" format="bed" />
+	</inputs>
+	<outputs>
+		<data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" />
+		<data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" />
+		<data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene Model in BED format.
+
+
+Outputs
+++++++++++++++
+
+Read coverage over gene body. This module is used to check if reads coverage is uniform and if there is any 5’/3’ bias. This module scales all transcripts to 100 nt and calculates the number of reads covering each nucleotide position. Finally, it generates a plot illustrating the coverage profile along the gene body. NOTE: this module requires lots of memory for large BAM files, because it load the entire BAM file into memory. We add another script "geneBody_coverage2.py" into v2.3.1 which takes bigwig (instead of BAM) as input. It only use 200M RAM, but users need to convert BAM into WIG, and then WIG into BigWig.
+
+Example output:
+	.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/geneBody_coverage.png
+
+
+
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/geneBody_coverage2.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,54 @@
+<tool id="geneBody_coverage" name="Gene Body Converage (Bigwig)">
+	<description>
+		Read coverage over gene body.
+	</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python">
+		geneBody_coverage2.py -i $input -r $refgene -o output
+	</command>
+	<inputs>
+		<param name="input" type="data" label="Input bigwig file" format="bigwig" />
+		<param name="refgene" type="data" label="Reference Genome" format="bed" />
+	</inputs>
+	<outputs>
+		<data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" />
+		<data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" />
+		<data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene Model in BED format.
+
+
+Outputs
+++++++++++++++
+
+Read coverage over gene body. This module is used to check if reads coverage is uniform and if there is any 5’/3’ bias. This module scales all transcripts to 100 nt and calculates the number of reads covering each nucleotide position. Finally, it generates a plot illustrating the coverage profile along the gene body. NOTE: this module requires lots of memory for large BAM files, because it load the entire BAM file into memory. We add another script "geneBody_coverage2.py" into v2.3.1 which takes bigwig (instead of BAM) as input. It only use 200M RAM, but users need to convert BAM into WIG, and then WIG into BigWig.
+
+Example output:
+	.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/geneBody_coverage.png
+
+
+
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/infer_experiment.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,117 @@
+<tool id="infer_experiment" name="Infer Experiment">
+	<description>speculates how RNA-seq were configured</description>
+	<requirements>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> infer_experiment.py -i $input -r $refgene 
+	
+		#if $sample_size.boolean
+			-s $sample_size.size
+		#end if
+	
+		> $output
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="Input BAM/SAM file" />
+		<param name="refgene" type="data" format="bed" label="Reference gene model in bed format" />
+		<conditional name="sample_size">
+			<param name="boolean" type="boolean" label="Modify usable sampled reads" value="false" />
+			<when value="true">
+				<param name="size" type="integer" label="Number of usable sampled reads (default = 200000)" value="200000" />
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="txt" name="output" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene model in BED format.
+
+Number of usable sampled reads (default=200000)
+	Number of usable reads sampled from SAM/BAM file. More reads will give more accurate estimation, but make program little slower.
+
+
+Output
+++++++++++++++
+This program is used to speculate how RNA-seq sequencing were configured, especially how reads were stranded for strand-specific RNA-seq data, through comparing reads' mapping information to the underneath gene model. Generally, strand specific RNA-seq data should be handled differently in both visualization and RPKM calculation.
+
+For pair-end RNA-seq, there are two different ways to strand reads:
+
+1) 1++,1--,2+-,2-+
+	- read1 mapped to '+' strand indicates parental gene on '+' strand
+	- read1 mapped to '-' strand indicates parental gene on '-' strand
+	- read2 mapped to '+' strand indicates parental gene on '-' strand
+	- read2 mapped to '-' strand indicates parental gene on '+' strand
+2) 1+-,1-+,2++,2--
+	- read1 mapped to '+' strand indicates parental gene on '-' strand
+	- read1 mapped to '-' strand indicates parental gene on '+' strand
+	- read2 mapped to '+' strand indicates parental gene on '+' strand
+    - read2 mapped to '-' strand indicates parental gene on '-' strand
+
+For single-end RNA-seq, there are also two different ways to strand reads:
+
+1) ++,--
+	-read mapped to '+' strand indicates parental gene on '+' strand
+	- read mapped to '-' strand indicates parental gene on '-' strand
+2) +-,-+
+	- read mapped to '+' strand indicates parental gene on '-' strand
+	- read mapped to '-' strand indicates parental gene on '+' strand
+
+Example Output
+++++++++++++++
+
+**Example1** ::
+
+	=========================================================
+	This is PairEnd Data ::
+
+	Fraction of reads explained by "1++,1--,2+-,2-+": 0.4992
+	Fraction of reads explained by "1+-,1-+,2++,2--": 0.5008
+	Fraction of reads explained by other combinations: 0.0000
+	=========================================================
+
+*Conclusion*: We can infer that this is NOT a strand specific because 50% of reads can be explained by "1++,1--,2+-,2-+", while the other 50% can be explained by "1+-,1-+,2++,2--".
+
+**Example2** ::
+
+	============================================================
+	This is PairEnd Data 
+
+	Fraction of reads explained by "1++,1--,2+-,2-+": 0.9644 ::
+	Fraction of reads explained by "1+-,1-+,2++,2--": 0.0356	
+	Fraction of reads explained by other combinations: 0.0000
+	============================================================
+	
+*Conclusion*: We can infer that this is a strand-specific RNA-seq data. strandness of read1 is consistent with that of gene model, while strandness of read2 is opposite to the strand of reference gene model.
+
+**Example3** ::
+
+	=========================================================
+	This is SingleEnd Data ::
+
+	Fraction of reads explained by "++,--": 0.9840 ::
+	Fraction of reads explained by "+-,-+": 0.0160
+	Fraction of reads explained by other combinations: 0.0000
+	=========================================================
+
+*Conclusion*: This is single-end, strand specific RNA-seq data. Strandness of reads are concordant with strandness of reference gene.
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/inner_distance.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,95 @@
+<tool id="inner_distance" name="Inner Distance">
+	<description>calculate the inner distance (or insert size) between two paired RNA reads</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> inner_distance.py -i $input -o output -r $refgene
+
+		#if $bounds.hasLowerBound
+			-l $bounds.lowerBound
+		#end if
+
+		#if $bounds2.hasUpperBound
+			-u $bounds2.upperBound
+		#end if
+
+		#if $steps.step
+			-s $steps.stepSize
+		#end if
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+		<param name="refgene" type="data" format="bed" label="reference gene model" />
+		<conditional name="bounds">
+			<param name="hasLowerBound" type="boolean" label="Specify lower bound" value="false"/>
+			<when value="true">
+				<param name="lowerBound" type="integer" value="-250" label="Estimated Lower Bound (bp, default=-250)" />
+			</when>
+		</conditional>
+		<conditional name="bounds2">
+			<param name="hasUpperBound" type="boolean" label="Specify upper bound" value="false" />
+			<when value="true">
+				<param name="upperBound" type="integer" value="250" label="Estimated Upper Bound (bp, default=250)" />
+			</when>
+		</conditional>
+		<conditional name="steps">
+			<param name="step" type="boolean" label="Specify step size" value="false" />
+			<when value="true">
+				<param name="stepSize" type="integer" value="5" label="Step size (bp, default=5)" />
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt"/>
+		<data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" />
+		<data format="r" name="outputr" from_work_dir="output.inner_distance_plot.r" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene model in BED format.
+
+Estimated Upper/Lower Bounds (defaults=250 and -250)
+	Estimated upper/lower bounds of inner distance (bp).
+
+Step size (default=5)
+	Step size of histogram
+
+
+Output
+++++++++++++++
+
+1. output.inner_distance.txt:
+- first column is read ID
+-second column is inner distance. Could be negative value if PE reads were overlapped or mapping error (e.g. Read1_start < Read2_start, while Read1_end >> Read2_end due to spliced mapping of read1)
+- third column indicates how paired reads were mapped: PE_within_same_exon, PE_within_diff_exon,PE_reads_overlap
+2. output..inner_distance_freq.txt:
+- inner distance starts
+- inner distance ends
+- number of read pairs
+- note the first 2 columns are left side half open interval
+3. output.inner_distance_plot.r: R script to generate histogram
+4. output.inner_distance_plot.pdf: histogram plot
+
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/inner_distance.png
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/junction_annotation.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,73 @@
+<tool id="junction_annotation" name="Junction Annotation">
+	<description>compares detected splice junctions to reference gene model</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> junction_annotation.py -i $input -o output -r $refgene
+
+		#if $intron.hasIntron
+			-m $intron.min_Intron
+		#end if
+
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+		<param name="refgene" type="data" format="bed" label="reference gene model" />
+		<conditional name="intron">
+			<param name="hasIntron" type="boolean" label="Specify minimum intron length" value="false"/>
+			<when value="true">
+				<param name="min_Intron" type="integer" value="50" label="Minimum intron length (bp, default=50)" />
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="xls" name="outputxls" from_work_dir="output.junction.xls"/>
+		<data format="r" name="outputr" from_work_dir="output.junction_plot.r" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.splice_events.pdf"/>
+		<data format="pdf" name="outputjpdf" from_work_dir="output.splice_junction.pdf" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene model in BED format.
+
+Minimum intron length (default=50)
+	Minimum intron length (bp).
+
+
+Output
+++++++++++++++
+
+1. output.junc.anno.junction.xls:
+- chrom ID
+- start position of junction (coordinate is 0 based)
+- end position of junction (coordinate is 1 based)
+- number of splice events supporting this junction
+- 'annotated', 'complete_novel' or 'partial_novel'.
+2. output.anno.junction_plot.r: R script to generate pie chart
+3. output.splice_junction.pdf: plot of splice junctions
+4. output.splice_events.pdf: plot of splice events
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/junction.png
+
+
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/junction_saturation.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,74 @@
+<tool id="junction_saturation" name="Junction Saturation">
+	<description>detects splice junctions from each subset and compares them to reference gene model</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice
+
+		#if $percentiles.specifyPercentiles
+			-l $percentiles.lowBound -u $percentiles.upBound -s $percentiles.percentileStep
+		#end if
+
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+		<param name="refgene" type="data" format="bed" label="reference gene model" />
+		<param name="intronSize" type="integer" label="Minimum intron size (bp, default=50)" value="50"/>
+		<param name="minSplice" type="integer" label="Minimum coverage (default=1)" value="1" />
+		<conditional name="percentiles">
+			<param name="specifyPercentiles" type="boolean" label="Specify sampling bounds and frequency" value="false"/>
+			<when value="true">
+				<param name="lowBound" type="integer" value="5" label="Lower Bound Sampling Frequency (bp, default=5)" />
+				<param name="upBound" type="integer" value="100" label="Upper Bound Sampling Frequency (bp, default=100)" />
+				<param name="percentileStep" type="integer" value="5" label="Sampling increment (default=5)" />
+			</when>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data format="r" name="outputr" from_work_dir="output.junctionSaturation_plot.r"/>
+		<data format="pdf" name="outputpdf" from_work_dir="output.junctionSaturation_plot.pdf"/>
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene model in BED format.
+
+Sampling Percentiles - Upper Bound, Lower Bound, Sampling Increment (defaults= 100, 5, and 5)
+	Sampling starts from the Lower Bound and increments to the Upper Bound at the rate of the Sampling Increment.
+
+Minimum intron length (default=50)
+	Minimum intron length (bp).
+
+Minimum coverage (default=1)
+	Minimum number of supportting reads to call a junction.
+
+Output
+++++++++++++++
+
+1. output.junctionSaturation_plot.r: R script to generate plot
+2. output.junctionSaturation_plot.pdf
+
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/junction_saturation.png 
+
+In this example, current sequencing depth is almost saturated for "known junction" (red line) detection because the number of "known junction" reaches a plateau. In other words, nearly all "known junctions" (expressed in this particular tissue) have already been detected, and continue sequencing will not detect additional "known junction" and will only increase junction coverage (i.e. junction covered by more reads). While current sequencing depth is not saturated for novel junctions (green).
+
+
+	</help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_GC.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,46 @@
+<tool id="read_GC" name="Read GC">
+	<description>determines GC% and read count</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> read_GC.py -i $input -o output
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+	</inputs>
+	<outputs>
+		<data format="xls" name="outputxls" from_work_dir="output.dup.pos.DupRate.xls"/>
+		<data format="xls" name="outputseqxls" from_work_dir="output.dup.seq.DupRate.xls"/>
+		<data format="r" name="outputr" from_work_dir="output.DupRate_plot.r" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" />
+	</outputs>
+	<help>
+		.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Output
+++++++++++++++
+
+1. output.GC.xls: Two column, plain text file, first column is GC%, second column is read count
+2. output.GC_plot.r: R script to generate pdf file.
+3. output.GC_plot.pdf: graphical output generated from R script. 
+
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/read_gc.png
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_NVC.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,57 @@
+<tool id="read_NVC" name="Read NVC">
+	<description>to check the nucleotide composition bias</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> read_NVC.py -i $input -o output
+
+		#if $nx
+			-x
+		#end if
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+		<param name="nx" type="boolean" label="Include N,X in NVC plot" value="false" />
+	</inputs>
+	<outputs>
+		<data format="xls" name="outputxls" from_work_dir="output.NVC.xls"/>
+		<data format="r" name="outputr" from_work_dir="output.NVC_plot.r" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.NVC_plot.pdf" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Include N,X in NVC plot
+	Plots N and X alongside A, T, C, and G in plot.
+
+Output
+++++++++++++++
+
+This module is used to check the nucleotide composition bias. Due to random priming, certain patterns are over represented at the beginning (5'end) of reads. This bias could be easily examined by NVC (Nucleotide versus cycle) plot. NVC plot is generated by overlaying all reads together, then calculating nucleotide composition for each position of read (or each sequencing cycle). In ideal condition (genome is random and RNA-seq reads is randomly sampled from genome), we expect A%=C%=G%=T%=25% at each position of reads.
+
+
+1. output.NVC.xls: plain text file, each row is position of read (or sequencing cycle), each column is nucleotide (A,C,G,T,N,X)
+2. output.NVC_plot.r: R script to generate NVC plot.
+3. output.NVC_plot.pdf: NVC plot.
+
+
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/NVC_plot.png
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_distribution.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,69 @@
+<tool id="read_distribution" name="Read Distribution">
+	<description>calculates how mapped reads were distributed over genome feature</description>
+	<requirements>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> read_distribution.py -i $input -r $refgene > $output
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+		<param name="refgene" type="data" format="bed" label="reference gene model" />
+	</inputs>
+	<outputs>
+		<data format="txt" name="output" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Reference gene model
+	Gene model in BED format.
+
+Sample Output
+++++++++++++++
+
+::
+
+	Total Read: 44,826,454 ::
+
+	Total Tags: 50,023,249 ::
+
+	Total Assigned Tags: 36,057,402 ::
+
+	Group	Total_bases	Tag_count	Tags/Kb
+	CDS_Exons	33302033	20022538	601.24
+	5'UTR_Exons	21717577	4414913	203.29
+	3'UTR_Exons	15347845	3641689	237.28
+	Introns	1132597354	6312099	5.57
+	TSS_up_1kb	17957047	215220	11.99
+	TSS_up_5kb	81621382	392192	4.81
+	TSS_up_10kb	149730983	769210	5.14
+	TES_down_1kb	18298543	266157	14.55
+	TES_down_5kb	78900674	730072	9.25
+	TES_down_10kb	140361190	896953	6.39
+
+Note:
+- "Total Reads": This does NOT include those QC fail,duplicate and non-primary hit reads
+- "Total Tags": reads spliced once will be counted as 2 tags, reads spliced twice will be counted as 3 tags, etc. And because of this, "Total Fragments" >= "Total Reads"
+- "Total Assigned Tags": number of tags that can be unambiguously assigned the 10 groups (above table).
+- Tags assigned to "TSS_up_1kb" were also assigned to "TSS_up_5kb" and "TSS_up_10kb", tags assigned to "TSS_up_5kb" were also assigned to "TSS_up_10kb". Therefore, "Total Assigned Tags" = CDS_Exons + 5'UTR_Exons + 3'UTR_Exons + Introns + TSS_up_10kb + TES_down_10kb.
+- When assigning tags to genome features, each tag is represented by its middle point.
+- RSeQC cannot assign those reads that: 1) hit to intergenic regions that beyond region starting from TSS upstream 10Kb to TES downstream 10Kb. 2) hit to regions covered by both 5'UTR and 3' UTR. This is possible when two head-to-tail transcripts are overlapped in UTR regions. 3) hit to regions covered by both TSS upstream 10Kb and TES downstream 10Kb.
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_duplication.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,51 @@
+<tool id="read_duplication" name="Read Duplication">
+	<description>determines reads duplication rate with sequence-based and mapping-based strategies</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> read_duplication.py -i $input -o output -u $upLimit
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+		<param name="upLimit" type="integer" label="Upper Limit of Plotted Duplicated Times (default=500)" value="500" />
+	</inputs>
+	<outputs>
+		<data format="xls" name="outputxls" from_work_dir="output.dup.pos.DupRate.xls"/>
+		<data format="xls" name="outputseqxls" from_work_dir="output.dup.seq.DupRate.xls"/>
+		<data format="r" name="outputr" from_work_dir="output.DupRate_plot.r" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Upper Limit of Plotted Duplicated Times (default=500)
+	Only used for plotting.
+
+Output
+++++++++++++++
+
+1. output.dup.pos.DupRate.xls: Read duplication rate determined from mapping position of read. First column is "occurrence" or duplication times, second column is number of uniquely mapped reads.
+2. output.dup.seq.DupRate.xls: Read duplication rate determined from sequence of read. First column is "occurrence" or duplication times, second column is number of uniquely mapped reads.
+3. output.DupRate_plot.r: R script to generate pdf file
+4. output.DupRate_plot.pdf: graphical output generated from R script
+
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/duplicate.png
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/read_quality.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,50 @@
+<tool id="read_quality" name="Read Quality">
+	<description>determines Phred quality score</description>
+	<requirements>
+		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.3.7">rseqc</requirement>
+	</requirements>
+	<command interpreter="python"> read_quality.py -i $input -o output -r $reduce
+	</command>
+	<inputs>
+		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
+		<param name="reduce" type="integer" label="Ignore Phred scores less than this amount (only applies to 'boxplot', default=1000)" value="1000" />
+	</inputs>
+	<outputs>
+		<data format="r" name="outputr" from_work_dir="output.qual.r" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.qual.heatmap.pdf" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.qual.boxplot.pdf" />
+	</outputs>
+	<help>
+.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+
+-----
+
+About RSeQC
++++++++++++
+
+The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+Inputs
+++++++++++++++
+
+Input BAM/SAM file
+	Alignment file in BAM/SAM format.
+
+Ignore phred scores less than this number (default=1000)
+	To avoid making huge vector in R, nucleotide with certain phred score represented less than this number will be ignored. Increase this number save more memory while reduce precision. This option only applies to the 'boxplot'.
+
+Output
+++++++++++++++
+
+1. output.qual.r
+2. output.qual.boxplot.pdf
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/36mer.qual.plot.png
+3. output.qual.heatmap.pdf
+.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/36mer.qual.heatmap.png
+use different color to represent nucleotide density ("blue"=low density,"orange"=median density,"red"=high density")
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtoolshelper.py	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,20 @@
+import sys
+import subprocess as sp
+import os
+
+# Creates the sorted and indexed bam/bai files that are requried for both bam2wig and RSEQC_count
+def samtools_sorted(bam):
+	sortedbam = bam + ".sorted"
+	indexedbam = ".".join([sortedbam,"bam.bai"])
+	sp.call(['samtools', 'sort', '-m 1000000000', bam, sortedbam])
+	sortedbam = sortedbam + '.bam'
+	sp.call(['samtools', 'index', sortedbam, indexedbam])
+	return sortedbam
+
+def main(args):
+	args[2] = samtools_sorted(args[2])
+	sp.call(args)
+
+
+if __name__ == "__main__":
+	main(sys.argv[1:])
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Jul 11 12:31:33 2013 -0400
@@ -0,0 +1,60 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="R" version="2.15.1">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://CRAN.R-project.org/src/base/R-2/R-2.15.1.tar.gz</action>
+                <action type="shell_command">./configure --prefix=$INSTALL_DIR/lib</action>
+                <action type="shell_command">make</action>
+                <action type="move_file">
+                    <source>bin/R</source>
+                    <destination>$INSTALL_DIR/lib/bin</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            You need a FORTRAN compiler or perhaps f2c in addition to a C compiler to build R.
+        </readme>
+    </package>
+    <package name="samtools" version="0.1.18">
+        <install version="1.0">
+            <actions>
+                <action type="download_by_url">http://sourceforge.net/projects/samtools/files/samtools/0.1.19/samtools-0.1.19.tar.bz2</action>
+                <action type="shell_command">make</action>
+                <action type="move_file">
+                    <source>samtools</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="move_file">
+                    <source>bcftools/bcftools</source>
+                    <destination>$INSTALL_DIR/bin</destination>
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>Both BCFTools and Samtools installed in this dependency.Compiling SAMtools requires the ncurses and zlib development libraries.</readme>
+    </package>
+    <package name="rseqc" version="2.3.7">
+        <install version = "1.0">
+            <actions>
+                <action type="download_by_url">http://sourceforge.net/projects/rseqc/files/RSeQC-2.3.7.tar.gz</action>
+                <action type="shell_command">python setup.py install --root $INSTALL_DIR/lib/rseqc</action>
+                <action type="set_environment">
+                    <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/lib/rseqc/usr/local/lib/python2.7/site-packages
+                </action>
+                <action type="set_environment">
+                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/lib/rseqc/usr/local/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+        	RSeQC version 2.3.7, documentation available at http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/index.html#.
+        </readme>
+    </package>
+    
+</tool_dependency>