changeset 31:cc5eaa9376d8

Lance's updates
author nilesh
date Wed, 02 Oct 2013 02:20:04 -0400
parents b5d2f575ccb6
children 580ee0c4bc4e
files RPKM_count.xml RPKM_saturation.xml bam2wig.xml bam_stat.xml clipping_profile.xml geneBody_coverage.xml geneBody_coverage2.xml infer_experiment.xml inner_distance.xml junction_annotation.xml junction_saturation.xml read_GC.xml read_NVC.xml read_distribution.xml read_duplication.xml read_quality.xml tool_dependencies.xml
diffstat 17 files changed, 706 insertions(+), 385 deletions(-) [+]
line wrap: on
line diff
--- a/RPKM_count.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/RPKM_count.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,15 +1,14 @@
-<tool id="RPKM_count" name="RPKM Count">
+<tool id="RPKM_count" name="RPKM Count" version="1.1">
 	<description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
 	<requirements>
-		<requirement type="package" version="0.1.18">samtools</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> samtoolshelper.py RPKM_count.py -i $input -o output -r $refgene
+    <command>
+        ln -s "${input}" "local_input.bam" &amp;&amp;
+        ln -s "${input.metadata.bam_index}" "local_input.bam.bai" &amp;&amp;
+        RPKM_count.py -i "local_input.bam" -o output -r $refgene
 
-		#if $nx
-			-x
-		#end if
-		
 		#if str($strand_type.strand_specific) == "pair"
 			-d
 			#if str($strand_type.pair_type) == "sd"
@@ -66,17 +65,19 @@
 	<outputs>
 		<data format="xls" name="outputxls" from_work_dir="output_read_count.xls"/>
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
-
------
+RPKM_count.py
++++++++++++++
 
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+Given a BAM file and reference gene model, this program will calculate the raw count and RPKM
+values for transcript at exon, intron and mRNA level. For strand specific RNA-seq data,
+program will assign read to its parental gene according to strand rule, if you don't know the
+strand rule, run infer_experiment.py. Please note that chromosome ID, genome cooridinates
+should be concordant between BAM and BED files. 
 
 Inputs
 ++++++++++++++
@@ -102,46 +103,30 @@
 Sample Output
 ++++++++++++++
 
-=====	=====		===			=========				=====	===========		=============	=============	========	========
-chrom	start		end			accession				score	gene strand 	tag count (+)	tag count (-)	RPKM (+)	RPKM (-)
-=====	=====		===			=========				=====	===========		=============	=============	========	========
-chr1	29213722	29313959	NM_001166007_intron_1	0		+				431				4329			0.086		0.863
-chr1	29314417	29319841	NM_001166007_intron_2	0		+				31				1				0.114		0.004
-chr1	29320054	29323726	NM_001166007_intron_3	0		+				32				0				0.174		0
-chr1	29323831	29338376	NM_001166007_intron_4	0		+				33				2				0.045		0.003
-chr1	29338419	29342203	NM_001166007_intron_5	0		+				7				0				0.037		0
-chr1	29342279	29344735	NM_001166007_intron_6	0		+				35				4				0.285		0.033
-chr1	29344954	29356911	NM_001166007_intron_7	0		+				34				2				0.057		0.003
-chr1	29356999	29359604	NM_001166007_intron_8	0		+				19				1				0.146		0.008
-chr1	29359757	29362337	NM_001166007_intron_9	0		+				31				0				0.24		0
-chr1	29362435	29365765	NM_001166007_intron_10	0		+				11				1				0.066		0.006
-chr1	29365938	29379615	NM_001166007_intron_11	0		+				63				0				0.092		0
-chr1	29379824	29391493	NM_001166007_intron_12	0		+				383				8				0.656		0.014
-chr1	29391670	29424318	NM_001166007_intron_13	0		+				817				10				0.5			0.006
-chr1	29424447	29435847	NM_001166007_intron_14	0		+				28				0				0.049		0
-chr1	29435949	29438879	NM_001166007_intron_15	0		+				12				0				0.082		0
-chr1	29438960	29442210	NM_001166007_intron_16	0		+				22				2				0.135		0.012
-chr1	29442315	29443330	NM_001166007_intron_17	0		+				9				0				0.177		0
-chr1	29213602	29213722	NM_001166007_exon_1		0		+				164				0				27.321		0
-chr1	29313959	29314417	NM_001166007_exon_2		0		+				1699			4				74.158		0.175
-chr1	29319841	29320054	NM_001166007_exon_3		0		+				528				1				49.554		0.094
-chr1	29323726	29323831	NM_001166007_exon_4		0		+				168				0				31.985		0
-chr1	29338376	29338419	NM_001166007_exon_5		0		+				88				0				40.911		0
-chr1	29342203	29342279	NM_001166007_exon_6		0		+				114				3				29.986		0.789
-chr1	29344735	29344954	NM_001166007_exon_7		0		+				290				10				26.472		0.913
-chr1	29356911	29356999	NM_001166007_exon_8		0		+				146				1				33.166		0.227
-chr1	29359604	29359757	NM_001166007_exon_9		0		+				404				11				52.786		1.437
-chr1	29362337	29362435	NM_001166007_exon_10	0		+				85				7				17.339		1.428
-chr1	29365765	29365938	NM_001166007_exon_11	0		+				198				2				22.88		0.231
-chr1	29379615	29379824	NM_001166007_exon_12	0		+				306				5				29.269		0.478
-chr1	29391493	29391670	NM_001166007_exon_13	0		+				243				7				27.445		0.791
-chr1	29424318	29424447	NM_001166007_exon_14	0		+				298				7				46.18		1.085
-chr1	29435847	29435949	NM_001166007_exon_15	0		+				396				8				77.611		1.568
-chr1	29438879	29438960	NM_001166007_exon_16	0		+				307				0				75.767		0
-chr1	29442210	29442315	NM_001166007_exon_17	0		+				138				0				26.273		0
-chr1	29443330	29446558	NM_001166007_exon_18	0		+				2434			84				15.074		0.52
-chr1	29213602	29446558	NM_001166007_mRNA		0		+				8006			150				27.704		0.519
-=====	=====		===			=========				=====	===========		=============	=============	========	========
+=====   ========   ========   =====================    =====  ===========   =============   =============   ========  =========
+chrom   start      end        accession                score  gene strand   tag count (+)   tag count (-)   RPKM (+)  RPKM (-)
+=====   ========   ========   =====================    =====  ===========   =============   =============   ========  =========
+chr1    29213722   29313959   NM_001166007_intron_1    0      '+'             431             4329            0.086     0.863
+chr1    29314417   29319841   NM_001166007_intron_2    0      '+'             31              1               0.114     0.004
+chr1    29320054   29323726   NM_001166007_intron_3    0      '+'             32              0               0.174     0.000
+chr1    29213602   29213722   NM_001166007_exon_1      0      '+'             164             0               27.321    0.000
+chr1    29313959   29314417   NM_001166007_exon_2      0      '+'            1699            4               74.158    0.175
+chr1    29319841   29320054   NM_001166007_exon_3      0      '+'             528             1               49.554    0.094
+=====   ========   ========   =====================    =====  ===========   =============   =============   ========  =========
 	
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
+
 	</help>
 </tool>
--- a/RPKM_saturation.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/RPKM_saturation.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,10 +1,11 @@
-<tool id="RPKM_saturation" name="RPKM Saturation">
+<tool id="RPKM_saturation" name="RPKM Saturation" version="1.1">
 	<description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> RPKM_saturation.py -i $input -o output -r $refgene
+	<command> RPKM_saturation.py -i $input -o output -r $refgene
 
 		#if str($strand_type.strand_specific) == "pair"
 			-d
@@ -56,22 +57,37 @@
 		<param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" />
 	</inputs>
 	<outputs>
-		<data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls"/>
-		<data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls"/>
-		<data format="r" name="outputr" from_work_dir="output.saturation.r"/>
-		<data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf"/>
+		<data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/>
+		<data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/>
+		<data format="r" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/>
+		<data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/>
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+RPKM_saturation.py
+++++++++++++++++++
 
------
+The precision of any sample statitics (RPKM) is affected by sample size (sequencing depth);
+\'resampling\' or \'jackknifing\' is a method to estimate the precision of sample statistics by
+using subsets of available data. This module will resample a series of subsets from total RNA
+reads and then calculate RPKM value using each subset. By doing this we are able to check if
+the current sequencing depth was saturated or not (or if the RPKM values were stable or not)
+in terms of genes' expression estimation. If sequencing depth was saturated, the estimated
+RPKM value will be stationary or reproducible. By default, this module will calculate 20
+RPKM values (using 5%, 10%, ... , 95%,100% of total reads) for each transcripts. 
 
-About RSeQC
-+++++++++++
+In the output figure, Y axis is "Percent Relative Error" or "Percent Error" which is used
+to measures how the RPKM estimated from subset of reads (i.e. RPKMobs) deviates from real
+expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a
+proxy, we use the RPKM estimated from total reads to approximate RPKMreal.
 
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+.. image:: http://rseqc.sourceforge.net/_images/RelativeError.png
+   :height: 80 px
+   :width: 400 px
+   :scale: 100 %
 
 Inputs
 ++++++++++++++
@@ -102,7 +118,10 @@
 3. output.saturation.r: R script to generate plot
 4. output.saturation.pdf:
 
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/saturation.png
+.. image:: http://rseqc.sourceforge.net/_images/saturation.png
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 %     
 
 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups:
 	1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile.
@@ -111,8 +130,31 @@
 	4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile.
 - BAM/SAM file containing more than 100 million alignments will make module very slow.
 - Follow example below to visualize a particular transcript (using R console)::
-- output example
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/saturation_eg.png
+
+    pdf("xxx.pdf")     #starts the graphics device driver for producing PDF graphics
+    x &lt;- seq(5,100,5)  #resampling percentage (5,10,15,...,100)
+    rpkm &lt;- c(32.95,35.43,35.15,36.04,36.41,37.76,38.96,38.62,37.81,38.14,37.97,38.58,38.59,38.54,38.67, 38.67,38.87,38.68,  38.42,  38.23)  #Paste RPKM values calculated from each subsets
+    scatter.smooth(x,100*abs(rpkm-rpkm[length(rpkm)])/(rpkm[length(rpkm)]),type="p",ylab="Precent Relative Error",xlab="Resampling Percentage")
+    dev.off()          #close graphical device
+
+.. image:: http://rseqc.sourceforge.net/_images/saturation_eg.png
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 % 
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
 
 	</help>
 </tool>
--- a/bam2wig.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/bam2wig.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,14 +1,16 @@
-<tool id="bam2wig" name="BAM to Wiggle">
+<tool id="bam2wig" name="BAM to Wiggle" version="1.1">
 	<description> 
 		converts all types of RNA-seq data from .bam to .wig 
 	</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
-		<requirement type="package" version="0.1.18">samtools</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> 
-		samtoolshelper.py /home/nilesh/RSeQC-2.3.3/scripts/bam2wig.py -i $input -s $chromsize -o outfile
+	<command> 
+        ln -s "${input}" "local_input.bam" &amp;&amp;
+        ln -s "${input.metadata.bam_index}" "local_input.bam.bai" &amp;&amp;
+		bam2wig.py -i "local_input.bam" -s $chromsize -o outfile
 
 		#if str($strand_type.strand_specific) == "pair"
 			-d
@@ -73,24 +75,29 @@
 		<data format="wig" name="output" from_work_dir="outfile.wig">
 			<filter>strand_type['strand_specific'] == 'none'</filter>
 		</data>
-		<data format="wig" name="outputfwd" from_work_dir="outfile_Forward.wig">
+		<data format="wig" name="outputfwd" from_work_dir="outfile_Forward.wig" label="${tool.name} on ${on_string} (Forward Reads)">
 			<filter>strand_type['strand_specific'] != 'none'</filter>
 		</data>
-		<data format="wig" name="outputrv" from_work_dir="outfile_Reverse.wig">
+		<data format="wig" name="outputrv" from_work_dir="outfile_Reverse.wig" label="${tool.name} on ${on_string} (Reverse Reads)">
 			<filter>strand_type['strand_specific'] != 'none'</filter>
 		</data>
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
-
------
+bam2wig.py
+++++++++++
 
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+Visualization is the most straightforward and effective way to QC your RNA-seq
+data. For example, change of expression or new splicing can be easily checked
+by visually comparing two RNA-seq tracks using genome browser such as UCSC_,
+IGB_ and IGV_.  `bam2wig.py` converts all types of RNA-seq data from BAM_
+format into wiggle_ format in one-stop.  wiggle_ files can then be easily
+converted into bigwig_.  Bigwig is indexed, binary format of wiggle file, and
+it's particular useful to display large, continuous dataset on genome
+browser.
 
 Inputs
 ++++++++++++++
@@ -116,6 +123,25 @@
 If RNA-seq is not strand specific, one wig file will be generated, if RNA-seq
 is strand specific, two wig files corresponding to Forward and Reverse will be generated.
 
+-----
+
+About RSeQC 
++++++++++++
+
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+.. _UCSC: http://genome.ucsc.edu/index.html
+.. _IGB: http://bioviz.org/igb/
+.. _IGV: http://www.broadinstitute.org/igv/home
+.. _BAM: http://genome.ucsc.edu/goldenPath/help/bam.html
+.. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html
+.. _bigwig: http://genome.ucsc.edu/FAQ/FAQformat.html#format6.1
 
 	</help>
-</tool>
\ No newline at end of file
+</tool>
--- a/bam_stat.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/bam_stat.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,12 +1,13 @@
-<tool id="bam_stat" name="BAM/SAM Mapping Stats">
+<tool id="bam_stat" name="BAM/SAM Mapping Stats" version="1.1">
 	<description>
 		reads mapping statistics for a provided BAM or SAM file.
 	</description>
 	<requirements>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>s
-	<command interpreter="python">
-		bam_stat.py -i $input -q $mapqual > $output
+	<command>
+		bam_stat.py -i $input -q $mapqual 2> $output
 	</command>
 	<inputs>
 		<param name="input" type="data" label="Input .bam/.sam File" format="bam,sam" />
@@ -15,17 +16,19 @@
 	<outputs>
 		<data format="txt" name="output" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
-
------
-
-About RSeQC
+bam_stat.py
 +++++++++++
 
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+This program is used to calculate reads mapping statistics from provided BAM
+file.  This script determines "uniquely mapped reads" from `mapping quality`_,
+which quality the probability that a read is misplaced (Do NOT confused with
+sequence quality, sequence quality measures the probability that a base-calling
+was wrong) .
 
 Inputs
 ++++++++++++++
@@ -44,6 +47,19 @@
 - Uniquely mapped Reads = {Reads map to '+'} + {Reads map to '-'}
 - Uniquely mapped Reads = {Splice reads} + {Non-splice reads}
 
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+.. _`mapping quality`: http://genome.sph.umich.edu/wiki/Mapping_Quality_Scores
 
 	</help>
-</tool>
\ No newline at end of file
+</tool>
--- a/clipping_profile.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/clipping_profile.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,12 +1,13 @@
-<tool id="clipping_profile" name="Clipping Profile">
+<tool id="clipping_profile" name="Clipping Profile" version="1.1">
 	<description>
 	 estimates clipping profile of RNA-seq reads from BAM or SAM file
 	</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python">
+	<command>
 		clipping_profile.py -i $input -o output
 	</command>
 	<inputs>
@@ -16,17 +17,17 @@
 		<data format="xls" name="outputxls" from_work_dir="output.clipping_profile.xls" />
 		<data format="r" name="outputr" from_work_dir="output.clipping_profile.r" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
-
------
+clipping_profile.py
++++++++++++++++++++
 
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+This program is used to estimate clipping profile of RNA-seq reads from BAM or SAM file.
+Note that to use this funciton, CIGAR strings within SAM/BAM file should have 'S' operation
+(This means your reads aligner should support clipped mapping).
 
 Inputs
 ++++++++++++++
@@ -38,8 +39,23 @@
 Sample Output
 ++++++++++++++
 
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/clipping_good.png
+.. image:: http://rseqc.sourceforge.net/_images/clipping_good.png
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 %   
+
+-----
 
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
 
 	</help>
-</tool>
\ No newline at end of file
+</tool>
--- a/geneBody_coverage.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/geneBody_coverage.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,12 +1,13 @@
-<tool id="geneBody_coverage" name="Gene Body Converage (BAM)">
+<tool id="geneBody_coverage" name="Gene Body Converage (BAM)" version="1.1">
 	<description>
 		Read coverage over gene body.
 	</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python">
+	<command>
 		geneBody_coverage.py -i $input -r $refgene -o output
 	</command>
 	<inputs>
@@ -14,21 +15,25 @@
 		<param name="refgene" type="data" label="Reference Genome" format="bed" />
 	</inputs>
 	<outputs>
-		<data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" />
-		<data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" />
-		<data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" />
-	</outputs>
+		<data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" label="${tool.name} on ${on_string} (PDF)" />
+		<data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" label="${tool.name} on ${on_string} (R Script)" />
+		<data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" />
+    </outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+geneBody_coverage.py
+++++++++++++++++++++
 
------
-
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+Read coverage over gene body. This module is used to check if reads coverage is uniform and
+if there is any 5\'/3\' bias. This module scales all transcripts to 100 nt and calculates the
+number of reads covering each nucleotide position. Finally, it generates a plot illustrating
+the coverage profile along the gene body. NOTE: this module requires lots of memory for large
+BAM files, because it load the entire BAM file into memory. We add another script 
+"geneBody_coverage2.py" into v2.3.1 which takes bigwig (instead of BAM) as input. 
+It only use 200M RAM, but users need to convert BAM into WIG, and then WIG into BigWig. 
 
 Inputs
 ++++++++++++++
@@ -46,9 +51,26 @@
 Read coverage over gene body. This module is used to check if reads coverage is uniform and if there is any 5’/3’ bias. This module scales all transcripts to 100 nt and calculates the number of reads covering each nucleotide position. Finally, it generates a plot illustrating the coverage profile along the gene body. NOTE: this module requires lots of memory for large BAM files, because it load the entire BAM file into memory. We add another script "geneBody_coverage2.py" into v2.3.1 which takes bigwig (instead of BAM) as input. It only use 200M RAM, but users need to convert BAM into WIG, and then WIG into BigWig.
 
 Example output:
-	.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/geneBody_coverage.png
+    .. image:: http://rseqc.sourceforge.net/_images/geneBody_coverage.png
+        :height: 600 px
+        :width: 600 px
+        :scale: 80 %                        
+
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
 
 
 
 	</help>
-</tool>
\ No newline at end of file
+</tool>
--- a/geneBody_coverage2.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/geneBody_coverage2.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,12 +1,13 @@
-<tool id="geneBody_coverage" name="Gene Body Converage (Bigwig)">
+<tool id="geneBody_coverage2" name="Gene Body Converage (Bigwig)" version="1.1">
 	<description>
 		Read coverage over gene body.
 	</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python">
+	<command>
 		geneBody_coverage2.py -i $input -r $refgene -o output
 	</command>
 	<inputs>
@@ -14,21 +15,21 @@
 		<param name="refgene" type="data" label="Reference Genome" format="bed" />
 	</inputs>
 	<outputs>
-		<data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" />
-		<data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" />
-		<data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" />
-	</outputs>
+		<data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" label="${tool.name} on ${on_string} (PDF)" />
+		<data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" label="${tool.name} on ${on_string} (R Script)" />
+		<data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" />
+    </outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+geneBody_coverage2.py
++++++++++++++++++++++
 
------
+Similar to geneBody_coverage.py. This module takes bigwig instead of BAM as input, and thus
+requires much less memory. The BigWig file could be arbitrarily large.
 
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
 
 Inputs
 ++++++++++++++
@@ -46,9 +47,25 @@
 Read coverage over gene body. This module is used to check if reads coverage is uniform and if there is any 5’/3’ bias. This module scales all transcripts to 100 nt and calculates the number of reads covering each nucleotide position. Finally, it generates a plot illustrating the coverage profile along the gene body. NOTE: this module requires lots of memory for large BAM files, because it load the entire BAM file into memory. We add another script "geneBody_coverage2.py" into v2.3.1 which takes bigwig (instead of BAM) as input. It only use 200M RAM, but users need to convert BAM into WIG, and then WIG into BigWig.
 
 Example output:
-	.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/geneBody_coverage.png
+    .. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/geneBody_coverage.png
+        :height: 600 px
+        :width: 600 px
+        :scale: 80 %    
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
 
 
 
 	</help>
-</tool>
\ No newline at end of file
+</tool>
--- a/infer_experiment.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/infer_experiment.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,9 +1,10 @@
-<tool id="infer_experiment" name="Infer Experiment">
+<tool id="infer_experiment" name="Infer Experiment" version="1.1">
 	<description>speculates how RNA-seq were configured</description>
 	<requirements>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> infer_experiment.py -i $input -r $refgene 
+	<command> infer_experiment.py -i $input -r $refgene 
 	
 		#if $sample_size.boolean
 			-s $sample_size.size
@@ -24,17 +25,18 @@
 	<outputs>
 		<data format="txt" name="output" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
-
------
+infer_experiment.py
++++++++++++++++++++
 
-About RSeQC
-+++++++++++
+This program is used to speculate how RNA-seq sequencing were configured, especially how
+reads were stranded for strand-specific RNA-seq data, through comparing reads' mapping
+information to the underneath gene model. 
 
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
 
 Inputs
 ++++++++++++++
@@ -48,32 +50,38 @@
 Number of usable sampled reads (default=200000)
 	Number of usable reads sampled from SAM/BAM file. More reads will give more accurate estimation, but make program little slower.
 
+Outputs
++++++++
 
-Output
-++++++++++++++
-This program is used to speculate how RNA-seq sequencing were configured, especially how reads were stranded for strand-specific RNA-seq data, through comparing reads' mapping information to the underneath gene model. Generally, strand specific RNA-seq data should be handled differently in both visualization and RPKM calculation.
+For pair-end RNA-seq, there are two different
+ways to strand reads (such as Illumina ScriptSeq protocol):
 
-For pair-end RNA-seq, there are two different ways to strand reads:
+1. 1++,1--,2+-,2-+
 
-1) 1++,1--,2+-,2-+
-	- read1 mapped to '+' strand indicates parental gene on '+' strand
-	- read1 mapped to '-' strand indicates parental gene on '-' strand
-	- read2 mapped to '+' strand indicates parental gene on '-' strand
-	- read2 mapped to '-' strand indicates parental gene on '+' strand
-2) 1+-,1-+,2++,2--
-	- read1 mapped to '+' strand indicates parental gene on '-' strand
-	- read1 mapped to '-' strand indicates parental gene on '+' strand
-	- read2 mapped to '+' strand indicates parental gene on '+' strand
-    - read2 mapped to '-' strand indicates parental gene on '-' strand
+* read1 mapped to '+' strand indicates parental gene on '+' strand
+* read1 mapped to '-' strand indicates parental gene on '-' strand
+* read2 mapped to '+' strand indicates parental gene on '-' strand
+* read2 mapped to '-' strand indicates parental gene on '+' strand
+
+2. 1+-,1-+,2++,2--
+
+* read1 mapped to '+' strand indicates parental gene on '-' strand
+* read1 mapped to '-' strand indicates parental gene on '+' strand
+* read2 mapped to '+' strand indicates parental gene on '+' strand
+* read2 mapped to '-' strand indicates parental gene on '-' strand
 
 For single-end RNA-seq, there are also two different ways to strand reads:
 
-1) ++,--
-	-read mapped to '+' strand indicates parental gene on '+' strand
-	- read mapped to '-' strand indicates parental gene on '-' strand
-2) +-,-+
-	- read mapped to '+' strand indicates parental gene on '-' strand
-	- read mapped to '-' strand indicates parental gene on '+' strand
+1. ++,--
+
+* read mapped to '+' strand indicates parental gene on '+' strand
+* read mapped to '-' strand indicates parental gene on '-' strand
+
+2. +-,-+
+
+* read mapped to '+' strand indicates parental gene on '-' strand
+* read mapped to '-' strand indicates parental gene on '+' strand
+
 
 Example Output
 ++++++++++++++
@@ -113,5 +121,21 @@
 	=========================================================
 
 *Conclusion*: This is single-end, strand specific RNA-seq data. Strandness of reads are concordant with strandness of reference gene.
+
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
+
 	</help>
-</tool>
\ No newline at end of file
+</tool>
--- a/inner_distance.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/inner_distance.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,10 +1,11 @@
-<tool id="inner_distance" name="Inner Distance">
+<tool id="inner_distance" name="Inner Distance" version="1.1">
 	<description>calculate the inner distance (or insert size) between two paired RNA reads</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> inner_distance.py -i $input -o output -r $refgene
+	<command> inner_distance.py -i $input -o output -r $refgene
 
 		#if $bounds.hasLowerBound
 			-l $bounds.lowerBound
@@ -41,22 +42,30 @@
 		</conditional>
 	</inputs>
 	<outputs>
-		<data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt"/>
-		<data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" />
-		<data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" />
-		<data format="r" name="outputr" from_work_dir="output.inner_distance_plot.r" />
+		<data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt" label="${tool.name} on ${on_string} (Text)"/>
+		<data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" label="${tool.name} on ${on_string} (Freq Text)" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" label="${tool.name} on ${on_string} (PDF)" />
+		<data format="r" name="outputr" from_work_dir="output.inner_distance_plot.r" label="${tool.name} on ${on_string} (R Script)" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+inner_distance.py
++++++++++++++++++
 
------
+This module is used to calculate the inner distance (or insert size) between two paired RNA
+reads. The distance is the mRNA length between two paired fragments. We first determine the
+genomic (DNA) size between two paired reads: D_size = read2_start - read1_end, then
 
-About RSeQC
-+++++++++++
+* if two paired reads map to the same exon: inner distance = D_size
+* if two paired reads map to different exons:inner distance = D_size - intron_size
+* if two paired reads map non-exonic region (such as intron and intergenic region): inner distance = D_size
+* The inner_distance might be a negative value if two fragments were overlapped. 
 
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+NOTE: Not all read pairs were used to estimate the inner distance distribution. Those low
+quality, PCR duplication, multiple mapped reads were skipped. 
 
 Inputs
 ++++++++++++++
@@ -78,18 +87,36 @@
 ++++++++++++++
 
 1. output.inner_distance.txt:
-- first column is read ID
--second column is inner distance. Could be negative value if PE reads were overlapped or mapping error (e.g. Read1_start < Read2_start, while Read1_end >> Read2_end due to spliced mapping of read1)
-- third column indicates how paired reads were mapped: PE_within_same_exon, PE_within_diff_exon,PE_reads_overlap
+    - first column is read ID
+    -second column is inner distance. Could be negative value if PE reads were overlapped or mapping error (e.g. Read1_start &lt; Read2_start, while Read1_end >> Read2_end due to spliced mapping of read1)
+    - third column indicates how paired reads were mapped: PE_within_same_exon, PE_within_diff_exon,PE_reads_overlap
 2. output..inner_distance_freq.txt:
-- inner distance starts
-- inner distance ends
-- number of read pairs
-- note the first 2 columns are left side half open interval
+    - inner distance starts
+    - inner distance ends
+    - number of read pairs
+    - note the first 2 columns are left side half open interval
 3. output.inner_distance_plot.r: R script to generate histogram
 4. output.inner_distance_plot.pdf: histogram plot
 
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/inner_distance.png
+.. image:: http://rseqc.sourceforge.net/_images/inner_distance.png
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 %                        
+
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
 
 	</help>
 </tool>
--- a/junction_annotation.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/junction_annotation.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,10 +1,11 @@
-<tool id="junction_annotation" name="Junction Annotation">
+<tool id="junction_annotation" name="Junction Annotation" version="1.1">
 	<description>compares detected splice junctions to reference gene model</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> junction_annotation.py -i $input -o output -r $refgene
+	<command> junction_annotation.py -i $input -o output -r $refgene
 
 		#if $intron.hasIntron
 			-m $intron.min_Intron
@@ -22,22 +23,32 @@
 		</conditional>
 	</inputs>
 	<outputs>
-		<data format="xls" name="outputxls" from_work_dir="output.junction.xls"/>
-		<data format="r" name="outputr" from_work_dir="output.junction_plot.r" />
-		<data format="pdf" name="outputpdf" from_work_dir="output.splice_events.pdf"/>
-		<data format="pdf" name="outputjpdf" from_work_dir="output.splice_junction.pdf" />
+		<data format="xls" name="outputxls" from_work_dir="output.junction.xls" label="${tool.name} on ${on_string} (XLS)"/>
+		<data format="r" name="outputr" from_work_dir="output.junction_plot.r" label="${tool.name} on ${on_string} (R Script)" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.splice_events.pdf" label="${tool.name} on ${on_string} (Splice Events PDF)"/>
+		<data format="pdf" name="outputjpdf" from_work_dir="output.splice_junction.pdf" label="${tool.name} on ${on_string} (Splice Junction PDF)" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+junction_annotation.py
+++++++++++++++++++++++
 
------
+For a given alignment file (-i) in BAM or SAM format and a reference gene model (-r) in BED
+format, this program will compare detected splice junctions to reference gene model. splicing
+annotation is performed in two levels: splice event level and splice junction level.
+
+* splice event: An RNA read, especially long read, can be spliced 2 or more times, each time is called a splicing event; In this sense, 100 spliced reads can produce >= 100 splicing events. 
+* splice junction: multiple splicing events spanning the same intron can be consolidated into one splicing junction. 
 
-About RSeQC
-+++++++++++
+All detected junctions can be grouped to 3 exclusive categories:
 
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+1. Annotated: The junction is part of the gene model. Both splice sites, 5' splice site 
+   (5'SS) and 3'splice site (3'SS) can be annotated by reference gene model. 
+2. complete_novel: Complete new junction. Neither of the two splice sites cannot be annotated by gene model 
+3. partial_novel: One of the splice site (5'SS or 3'SS) is new, while the other splice site is annotated (known) 
 
 Inputs
 ++++++++++++++
@@ -56,15 +67,32 @@
 ++++++++++++++
 
 1. output.junc.anno.junction.xls:
-- chrom ID
-- start position of junction (coordinate is 0 based)
-- end position of junction (coordinate is 1 based)
-- number of splice events supporting this junction
-- 'annotated', 'complete_novel' or 'partial_novel'.
+    - chrom ID
+    - start position of junction (coordinate is 0 based)
+    - end position of junction (coordinate is 1 based)
+    - number of splice events supporting this junction
+    - 'annotated', 'complete_novel' or 'partial_novel'.
 2. output.anno.junction_plot.r: R script to generate pie chart
 3. output.splice_junction.pdf: plot of splice junctions
 4. output.splice_events.pdf: plot of splice events
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/junction.png
+
+.. image:: http://rseqc.sourceforge.net/_images/junction.png
+   :height: 400 px
+   :width: 850 px
+   :scale: 80 %      
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
 
 
 
--- a/junction_saturation.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/junction_saturation.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,10 +1,11 @@
-<tool id="junction_saturation" name="Junction Saturation">
+<tool id="junction_saturation" name="Junction Saturation" version="1.1">
 	<description>detects splice junctions from each subset and compares them to reference gene model</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice
+	<command> junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice
 
 		#if $percentiles.specifyPercentiles
 			-l $percentiles.lowBound -u $percentiles.upBound -s $percentiles.percentileStep
@@ -26,20 +27,26 @@
 		</conditional>
 	</inputs>
 	<outputs>
-		<data format="r" name="outputr" from_work_dir="output.junctionSaturation_plot.r"/>
-		<data format="pdf" name="outputpdf" from_work_dir="output.junctionSaturation_plot.pdf"/>
+		<data format="r" name="outputr" from_work_dir="output.junctionSaturation_plot.r" label="${tool.name} on ${on_string} (R Script)"/>
+		<data format="pdf" name="outputpdf" from_work_dir="output.junctionSaturation_plot.pdf" label="${tool.name} on ${on_string} (PDF)"/>
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+junction_saturation.py
+++++++++++++++++++++++
 
------
-
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+It's very important to check if current sequencing depth is deep enough to perform
+alternative splicing analyses. For a well annotated organism, the number of expressed genes
+in particular tissue is almost fixed so the number of splice junctions is also fixed. The fixed
+splice junctions can be predetermined from reference gene model. All (annotated) splice
+junctions should be rediscovered from a saturated RNA-seq data, otherwise, downstream
+alternative splicing analysis is problematic because low abundance splice junctions are
+missing. This module checks for saturation by resampling 5%, 10%, 15%, ..., 95% of total
+alignments from BAM or SAM file, and then detects splice junctions from each subset and
+compares them to reference gene model. 
 
 Inputs
 ++++++++++++++
@@ -65,10 +72,28 @@
 1. output.junctionSaturation_plot.r: R script to generate plot
 2. output.junctionSaturation_plot.pdf
 
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/junction_saturation.png 
+.. image:: http://rseqc.sourceforge.net/_images/junction_saturation.png
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 %    
 
 In this example, current sequencing depth is almost saturated for "known junction" (red line) detection because the number of "known junction" reaches a plateau. In other words, nearly all "known junctions" (expressed in this particular tissue) have already been detected, and continue sequencing will not detect additional "known junction" and will only increase junction coverage (i.e. junction covered by more reads). While current sequencing depth is not saturated for novel junctions (green).
 
 
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
+
+
 	</help>
-</tool>
\ No newline at end of file
+</tool>
--- a/read_GC.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/read_GC.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,31 +1,28 @@
-<tool id="read_GC" name="Read GC">
+<tool id="read_GC" name="Read GC" version="1.1">
 	<description>determines GC% and read count</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> read_GC.py -i $input -o output
+	<command> read_GC.py -i $input -o output
 	</command>
 	<inputs>
 		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
 	</inputs>
 	<outputs>
-		<data format="xls" name="outputxls" from_work_dir="output.dup.pos.DupRate.xls"/>
-		<data format="xls" name="outputseqxls" from_work_dir="output.dup.seq.DupRate.xls"/>
-		<data format="r" name="outputr" from_work_dir="output.DupRate_plot.r" />
-		<data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" />
+		<data format="xls" name="outputxls" from_work_dir="output.GC.xls" label="${tool.name} on ${on_string} (XLS)"/>
+		<data format="r" name="outputr" from_work_dir="output.GC_plot.r" label="${tool.name} on ${on_string} (R Script)" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.GC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-		.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+read_GC.py
+++++++++++
 
------
-
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
 
 Inputs
 ++++++++++++++
@@ -40,7 +37,24 @@
 2. output.GC_plot.r: R script to generate pdf file.
 3. output.GC_plot.pdf: graphical output generated from R script. 
 
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/read_gc.png
+.. image:: http://rseqc.sourceforge.net/_images/read_gc.png 
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 %    
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
 
 	</help>
 </tool>
--- a/read_NVC.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/read_NVC.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,10 +1,11 @@
-<tool id="read_NVC" name="Read NVC">
+<tool id="read_NVC" name="Read NVC" version="1.1">
 	<description>to check the nucleotide composition bias</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> read_NVC.py -i $input -o output
+	<command> read_NVC.py -i $input -o output
 
 		#if $nx
 			-x
@@ -15,21 +16,26 @@
 		<param name="nx" type="boolean" label="Include N,X in NVC plot" value="false" />
 	</inputs>
 	<outputs>
-		<data format="xls" name="outputxls" from_work_dir="output.NVC.xls"/>
-		<data format="r" name="outputr" from_work_dir="output.NVC_plot.r" />
-		<data format="pdf" name="outputpdf" from_work_dir="output.NVC_plot.pdf" />
+		<data format="xls" name="outputxls" from_work_dir="output.NVC.xls" label="${tool.name} on ${on_string} (XLS)" />
+		<data format="r" name="outputr" from_work_dir="output.NVC_plot.r" label="${tool.name} on ${on_string} (R Script)" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.NVC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
-
------
-
-About RSeQC
+read_NVC.py
 +++++++++++
 
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+This module is used to check the nucleotide composition bias. Due to random priming, certain
+patterns are over represented at the beginning (5'end) of reads. This bias could be easily
+examined by NVC (Nucleotide versus cycle) plot. NVC plot is generated by overlaying all
+reads together, then calculating nucleotide composition for each position of read
+(or each sequencing cycle). In ideal condition (genome is random and RNA-seq reads is
+randomly sampled from genome), we expect A%=C%=G%=T%=25% at each position of reads. 
 
-The RSeQC package is licensed under the GNU GPL v3 license.
+NOTE: this program expect a fixed read length
 
 Inputs
 ++++++++++++++
@@ -51,7 +57,24 @@
 3. output.NVC_plot.pdf: NVC plot.
 
 
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/NVC_plot.png
+.. image:: http://rseqc.sourceforge.net/_images/NVC_plot.png
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 %    
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
 
 	</help>
 </tool>
--- a/read_distribution.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/read_distribution.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,9 +1,10 @@
-<tool id="read_distribution" name="Read Distribution">
+<tool id="read_distribution" name="Read Distribution" version="1.1">
 	<description>calculates how mapped reads were distributed over genome feature</description>
 	<requirements>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> read_distribution.py -i $input -r $refgene > $output
+	<command> read_distribution.py -i $input -r $refgene > $output
 	</command>
 	<inputs>
 		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
@@ -12,17 +13,33 @@
 	<outputs>
 		<data format="txt" name="output" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
-
------
+read_distribution.py
+++++++++++++++++++++
 
-About RSeQC
-+++++++++++
+Provided a BAM/SAM file and reference gene model, this module will calculate how mapped
+reads were distributed over genome feature (like CDS exon, 5'UTR exon, 3' UTR exon, Intron,
+Intergenic regions). When genome features are overlapped (e.g. a region could be annotated
+as both exon and intron by two different transcripts) , they are prioritize as:
+CDS exons > UTR exons > Introns > Intergenic regions, for example, if a read was mapped to
+both CDS exon and intron, it will be assigned to CDS exons.
 
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+* "Total Reads": This does NOT include those QC fail,duplicate and non-primary hit reads
+* "Total Tags": reads spliced once will be counted as 2 tags, reads spliced twice will be counted as 3 tags, etc. And because of this, "Total Tags" >= "Total Reads"
+* "Total Assigned Tags": number of tags that can be unambiguously assigned the 10 groups (see below table).
+* Tags assigned to "TSS_up_1kb" were also assigned to "TSS_up_5kb" and "TSS_up_10kb", tags assigned to "TSS_up_5kb" were also assigned to "TSS_up_10kb". Therefore, "Total Assigned Tags" = CDS_Exons + 5'UTR_Exons + 3'UTR_Exons + Introns + TSS_up_10kb + TES_down_10kb.
+* When assign tags to genome features, each tag is represented by its middle point.
 
-The RSeQC package is licensed under the GNU GPL v3 license.
+RSeQC cannot assign those reads that:
+
+* hit to intergenic regions that beyond region starting from TSS upstream 10Kb to TES downstream 10Kb.
+* hit to regions covered by both 5'UTR and 3' UTR. This is possible when two head-to-tail transcripts are overlapped in UTR regions.
+* hit to regions covered by both TSS upstream 10Kb and TES downstream 10Kb. 
+
 
 Inputs
 ++++++++++++++
@@ -36,33 +53,36 @@
 Sample Output
 ++++++++++++++
 
-::
-
-	Total Read: 44,826,454 ::
-
-	Total Tags: 50,023,249 ::
-
-	Total Assigned Tags: 36,057,402 ::
+Output:
 
-	Group	Total_bases	Tag_count	Tags/Kb
-	CDS_Exons	33302033	20022538	601.24
-	5'UTR_Exons	21717577	4414913	203.29
-	3'UTR_Exons	15347845	3641689	237.28
-	Introns	1132597354	6312099	5.57
-	TSS_up_1kb	17957047	215220	11.99
-	TSS_up_5kb	81621382	392192	4.81
-	TSS_up_10kb	149730983	769210	5.14
-	TES_down_1kb	18298543	266157	14.55
-	TES_down_5kb	78900674	730072	9.25
-	TES_down_10kb	140361190	896953	6.39
+===============     ============        ===========         ===========
+Group               Total_bases         Tag_count           Tags/Kb    
+===============     ============        ===========         ===========
+CDS_Exons           33302033            20002271            600.63     
+5'UTR_Exons         21717577            4408991             203.01     
+3'UTR_Exons         15347845            3643326             237.38     
+Introns             1132597354          6325392             5.58       
+TSS_up_1kb          17957047            215331              11.99      
+TSS_up_5kb          81621382            392296              4.81       
+TSS_up_10kb         149730983           769231              5.14       
+TES_down_1kb        18298543            266161              14.55      
+TES_down_5kb        78900674            729997              9.25       
+TES_down_10kb       140361190           896882              6.39       
+===============     ============        ===========         ===========
 
-Note:
-- "Total Reads": This does NOT include those QC fail,duplicate and non-primary hit reads
-- "Total Tags": reads spliced once will be counted as 2 tags, reads spliced twice will be counted as 3 tags, etc. And because of this, "Total Fragments" >= "Total Reads"
-- "Total Assigned Tags": number of tags that can be unambiguously assigned the 10 groups (above table).
-- Tags assigned to "TSS_up_1kb" were also assigned to "TSS_up_5kb" and "TSS_up_10kb", tags assigned to "TSS_up_5kb" were also assigned to "TSS_up_10kb". Therefore, "Total Assigned Tags" = CDS_Exons + 5'UTR_Exons + 3'UTR_Exons + Introns + TSS_up_10kb + TES_down_10kb.
-- When assigning tags to genome features, each tag is represented by its middle point.
-- RSeQC cannot assign those reads that: 1) hit to intergenic regions that beyond region starting from TSS upstream 10Kb to TES downstream 10Kb. 2) hit to regions covered by both 5'UTR and 3' UTR. This is possible when two head-to-tail transcripts are overlapped in UTR regions. 3) hit to regions covered by both TSS upstream 10Kb and TES downstream 10Kb.
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
 
 
 	</help>
--- a/read_duplication.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/read_duplication.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,32 +1,34 @@
-<tool id="read_duplication" name="Read Duplication">
+<tool id="read_duplication" name="Read Duplication" version="1.1">
 	<description>determines reads duplication rate with sequence-based and mapping-based strategies</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> read_duplication.py -i $input -o output -u $upLimit
+	<command> read_duplication.py -i $input -o output -u $upLimit
 	</command>
 	<inputs>
 		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
 		<param name="upLimit" type="integer" label="Upper Limit of Plotted Duplicated Times (default=500)" value="500" />
 	</inputs>
 	<outputs>
-		<data format="xls" name="outputxls" from_work_dir="output.dup.pos.DupRate.xls"/>
-		<data format="xls" name="outputseqxls" from_work_dir="output.dup.seq.DupRate.xls"/>
-		<data format="r" name="outputr" from_work_dir="output.DupRate_plot.r" />
-		<data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" />
+		<data format="xls" name="outputxls" from_work_dir="output.dup.pos.DupRate.xls" label="${tool.name} on ${on_string} (Position XLS)"/>
+		<data format="xls" name="outputseqxls" from_work_dir="output.dup.seq.DupRate.xls" label="${tool.name} on ${on_string} (Sequence XLS)"/>
+		<data format="r" name="outputr" from_work_dir="output.DupRate_plot.r" label="${tool.name} on ${on_string} (R Script)" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" label="${tool.name} on ${on_string} (PDF)" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+read_duplication.py
++++++++++++++++++++
 
------
+Two strategies were used to determine reads duplication rate: 
 
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+* Sequence based: reads with exactly the same sequence content are regarded as duplicated reads. 
+* Mapping based: reads mapped to the same genomic location are regarded as duplicated reads. For splice reads, reads mapped to the same starting position and splice the same way are regarded as duplicated reads. 
 
 Inputs
 ++++++++++++++
@@ -45,7 +47,24 @@
 3. output.DupRate_plot.r: R script to generate pdf file
 4. output.DupRate_plot.pdf: graphical output generated from R script
 
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/duplicate.png
+.. image:: http://rseqc.sourceforge.net/_images/duplicate.png
+   :height: 600 px
+   :width: 600 px
+   :scale: 80 %    
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
 
 	</help>
 </tool>
--- a/read_quality.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/read_quality.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,31 +1,37 @@
-<tool id="read_quality" name="Read Quality">
+<tool id="read_quality" name="Read Quality" version="1.1">
 	<description>determines Phred quality score</description>
 	<requirements>
-		<requirement type="package" version="2.15.1">R</requirement>
+		<requirement type="package" version="2.11.0">R</requirement>
+		<requirement type="package" version="1.7.1">numpy</requirement>
 		<requirement type="package" version="2.3.7">rseqc</requirement>
 	</requirements>
-	<command interpreter="python"> read_quality.py -i $input -o output -r $reduce
+	<command> read_quality.py -i $input -o output -r $reduce
 	</command>
 	<inputs>
 		<param name="input" type="data" format="bam,sam" label="input bam/sam file" />
 		<param name="reduce" type="integer" label="Ignore Phred scores less than this amount (only applies to 'boxplot', default=1000)" value="1000" />
 	</inputs>
 	<outputs>
-		<data format="r" name="outputr" from_work_dir="output.qual.r" />
-		<data format="pdf" name="outputpdf" from_work_dir="output.qual.heatmap.pdf" />
-		<data format="pdf" name="outputpdf" from_work_dir="output.qual.boxplot.pdf" />
+		<data format="r" name="outputr" from_work_dir="output.qual.r" label="${tool.name} on ${on_string} (R Script)" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.qual.heatmap.pdf" label="${tool.name} on ${on_string} (Heatmap PDF)" />
+		<data format="pdf" name="outputpdf" from_work_dir="output.qual.boxplot.pdf" label="${tool.name} on ${on_string} (Boxplot PDF)" />
 	</outputs>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
+        <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
+    </stdio>
 	<help>
-.. image:: https://code.google.com/p/rseqc/logo?cct=1336721062
+read_quality.py
++++++++++++++++
 
------
-
-About RSeQC
-+++++++++++
-
-The RSeQC package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. “Basic modules” quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while “RNA-seq specific modules” investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
-
-The RSeQC package is licensed under the GNU GPL v3 license.
+According to SAM specification, if Q is the character to represent "base calling quality"
+in SAM file, then Phred Quality Score = ord(Q) - 33. Here ord() is python function that
+returns an integer representing the Unicode code point of the character when the argument
+is a unicode object, for example, ord('a') returns 97. Phred quality score is widely used
+to measure "reliability" of base-calling, for example, phred quality score of 20 means
+there is 1/100 chance that the base-calling is wrong, phred quality score of 30 means there 
+is 1/1000 chance that the base-calling is wrong. In general: Phred quality score = -10xlog(10)P,
+here P is probability that base-calling is wrong.
 
 Inputs
 ++++++++++++++
@@ -41,10 +47,31 @@
 
 1. output.qual.r
 2. output.qual.boxplot.pdf
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/36mer.qual.plot.png
+    .. image:: http://rseqc.sourceforge.net/_images/36mer.qual.plot.png
+        :height: 600 px
+        :width: 600 px
+        :scale: 80 %    
 3. output.qual.heatmap.pdf
-.. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/36mer.qual.heatmap.png
-use different color to represent nucleotide density ("blue"=low density,"orange"=median density,"red"=high density")
+    .. image:: http://rseqc.sourceforge.net/_images/36mer.qual.heatmap.png
+        :height: 600 px
+        :width: 600 px
+        :scale: 80 %    
+
+Heatmap: use different color to represent nucleotide density ("blue"=low density,"orange"=median density,"red"=high density")
+
+-----
+
+About RSeQC 
++++++++++++
+
+The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
+
+The RSeQC package is licensed under the GNU GPL v3 license.
+
+.. image:: http://rseqc.sourceforge.net/_static/logo.png
+
+.. _RSeQC: http://rseqc.sourceforge.net/
+
 
 	</help>
 </tool>
--- a/tool_dependencies.xml	Thu Jul 11 12:33:27 2013 -0400
+++ b/tool_dependencies.xml	Wed Oct 02 02:20:04 2013 -0400
@@ -1,14 +1,40 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="R" version="2.15.1">
+    <package name="R" version="2.11.0">
         <install version="1.0">
             <actions>
-                <action type="download_by_url">http://CRAN.R-project.org/src/base/R-2/R-2.15.1.tar.gz</action>
-                <action type="shell_command">./configure --prefix=$INSTALL_DIR/lib</action>
-                <action type="shell_command">make</action>
-                <action type="move_file">
-                    <source>bin/R</source>
-                    <destination>$INSTALL_DIR/lib/bin</destination>
+                <action type="download_by_url">http://cran.rstudio.com/src/base/R-2/R-2.11.0.tar.gz</action>
+                <action type="shell_command">
+                    ./configure --enable-R-shlib \
+                    --with-readline=no \
+                    --with-x=no \
+                    --prefix=$INSTALL_DIR \
+                    --libdir=$INSTALL_DIR/lib \
+                    --disable-R-framework
+                </action>
+                <action type="shell_command">make &amp;&amp; make install</action>
+                <action type="set_environment">
+                    <environment_variable action="set_to" name="R_HOME">$INSTALL_DIR/lib/R</environment_variable>
+                    <environment_variable action="set_to" name="R_LIBS">$INSTALL_DIR/lib/R/library</environment_variable>
+                    <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/lib/R/bin</environment_variable>
+                </action>
+            </actions>
+        </install>
+        <readme>
+            R is a free software environment for statistical computing and graphics.
+            NOTE: See custom compilation options above 
+        </readme>
+    </package>
+    <package name="numpy" version="1.7.1">
+        <repository toolshed="http://toolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="iuc" changeset_revision="74c21f9bdc39" />
+    </package>
+    <package name="rseqc" version="2.3.7">
+        <install version = "1.0">
+            <actions>
+                <action type="download_by_url">http://sourceforge.net/projects/rseqc/files/RSeQC-2.3.7.tar.gz</action>
+                <action type="shell_command">python setup.py install --root $INSTALL_DIR --prefix . --install-lib lib</action>
+                <action type="set_environment">
+                    <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/lib</environment_variable>
                 </action>
                 <action type="set_environment">
                     <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
@@ -16,45 +42,9 @@
             </actions>
         </install>
         <readme>
-            You need a FORTRAN compiler or perhaps f2c in addition to a C compiler to build R.
+            RSeQC version 2.3.7, documentation available at http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/index.html#.
+            Requires gcc, python, numpy, and R
         </readme>
     </package>
-    <package name="samtools" version="0.1.18">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">http://sourceforge.net/projects/samtools/files/samtools/0.1.19/samtools-0.1.19.tar.bz2</action>
-                <action type="shell_command">make</action>
-                <action type="move_file">
-                    <source>samtools</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="move_file">
-                    <source>bcftools/bcftools</source>
-                    <destination>$INSTALL_DIR/bin</destination>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>Both BCFTools and Samtools installed in this dependency.Compiling SAMtools requires the ncurses and zlib development libraries.</readme>
-    </package>
-    <package name="rseqc" version="2.3.7">
-        <install version = "1.0">
-            <actions>
-                <action type="download_by_url">http://sourceforge.net/projects/rseqc/files/RSeQC-2.3.7.tar.gz</action>
-                <action type="shell_command">python setup.py install --root $INSTALL_DIR/lib/rseqc</action>
-                <action type="set_environment">
-                    <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/lib/rseqc/usr/local/lib/python2.7/site-packages</environment_variable>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/lib/rseqc/usr/local/bin</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-        	RSeQC version 2.3.7, documentation available at http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/index.html#.
-        </readme>
-    </package>
-    
+
 </tool_dependency>