comparison RPKM_saturation.xml @ 49:6b33e31bda10 draft

Uploaded tar based on https://github.com/lparsons/galaxy_tools/tree/master/tools/rseqc 1a3c419bc0ded7c40cb2bc3e7c87bfb01ddfeba2
author lparsons
date Thu, 16 Jul 2015 17:43:43 -0400
parents eb339c5849bb
children f242ee103277
comparison
equal deleted inserted replaced
48:2e6190c29c54 49:6b33e31bda10
1 <tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="2.4"> 1 <tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="2.4galaxy1">
2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> 2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
3
4 <macros>
5 <import>rseqc_macros.xml</import>
6 </macros>
7
3 <requirements> 8 <requirements>
4 <requirement type="package" version="3.0.3">R</requirement> 9 <expand macro="requirement_package_r" />
5 <requirement type="package" version="1.7.1">numpy</requirement> 10 <expand macro="requirement_package_numpy" />
6 <requirement type="package" version="2.4">rseqc</requirement> 11 <expand macro="requirement_package_rseqc" />
7 </requirements> 12 </requirements>
8 <command> RPKM_saturation.py -i $input -o output -r $refgene 13
14 <expand macro="stdio" />
15
16 <version_command><![CDATA[RPKM_saturation.py --version]]></version_command>
17
18 <command><![CDATA[
19 RPKM_saturation.py -i $input -o output -r $refgene
9 20
10 #if str($strand_type.strand_specific) == "pair" 21 #if str($strand_type.strand_specific) == "pair"
11 -d 22 -d
12 #if str($strand_type.pair_type) == "sd" 23 #if str($strand_type.pair_type) == "sd"
13 '1++,1--,2+-,2-+' 24 '1++,1--,2+-,2-+'
24 '+-,-+' 35 '+-,-+'
25 #end if 36 #end if
26 #end if 37 #end if
27 38
28 -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff 39 -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff
40 ]]>
41 </command>
29 42
30 </command>
31 <stdio>
32 <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
33 <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
34 </stdio>
35 <inputs> 43 <inputs>
36 <param name="input" type="data" format="bam" label="input bam/sam file" /> 44 <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/>
37 <param name="refgene" type="data" format="bed" label="Reference gene model" /> 45 <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/>
38 <conditional name="strand_type"> 46 <conditional name="strand_type">
39 <param name="strand_specific" type="select" label="Strand-specific?" value="None"> 47 <param name="strand_specific" type="select" label="Strand-specific?" value="None">
40 <option value="none">None</option> 48 <option value="none">None</option>
41 <option value="pair">Pair-End RNA-seq</option> 49 <option value="pair">Pair-End RNA-seq</option>
42 <option value="single">Single-End RNA-seq</option> 50 <option value="single">Single-End RNA-seq</option>
43 </param> 51 </param>
44 <when value="pair"> 52 <when value="pair">
45 <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd"> 53 <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd" help="(--strand)">
46 <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> 54 <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
47 <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> 55 <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
48 </param> 56 </param>
49 </when> 57 </when>
50 <when value="single"> 58 <when value="single">
51 <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s"> 59 <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s" help="(--strand)">
52 <option value="s">positive --> positive; negative --> negative</option> 60 <option value="s">positive --> positive; negative --> negative</option>
53 <option value="d">positive --> negative; negative --> positive</option> 61 <option value="d">positive --> negative; negative --> positive</option>
54 </param> 62 </param>
55 </when> 63 </when>
56 <when value="none"></when> 64 <when value="none"></when>
57 </conditional> 65 </conditional>
58 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" /> 66 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" help="(--percentile-floor)"/>
59 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" /> 67 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" help="(--percentile-ceiling)" />
60 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" /> 68 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" help="(--percentile-step)" />
61 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" /> 69 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" help="(--rpkm-cutoff)" />
70 <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" />
62 </inputs> 71 </inputs>
72
63 <outputs> 73 <outputs>
64 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/> 74 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/>
65 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/> 75 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/>
66 <data format="txt" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/> 76 <data format="txt" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/>
67 <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/> 77 <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/>
68 </outputs> 78 </outputs>
69 <help> 79
80 <!-- Unable to succefully run this script with test data
81 <tests>
82 <test>
83 <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/>
84 <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/>
85 <output name="outputxls" file="output.eRPKM.xls"/>
86 <output name="outputrawxls" file="output.rawCount.xls"/>
87 <output name="outputr" file="output.saturation.r"/>
88 </test>
89 </tests>
90 -->
91
92 <help><![CDATA[
70 RPKM_saturation.py 93 RPKM_saturation.py
71 ++++++++++++++++++ 94 ++++++++++++++++++
72 95
73 The precision of any sample statitics (RPKM) is affected by sample size (sequencing depth); 96 The precision of any sample statitics (RPKM) is affected by sample size (sequencing depth);
74 \'resampling\' or \'jackknifing\' is a method to estimate the precision of sample statistics by 97 \'resampling\' or \'jackknifing\' is a method to estimate the precision of sample statistics by
75 using subsets of available data. This module will resample a series of subsets from total RNA 98 using subsets of available data. This module will resample a series of subsets from total RNA
76 reads and then calculate RPKM value using each subset. By doing this we are able to check if 99 reads and then calculate RPKM value using each subset. By doing this we are able to check if
77 the current sequencing depth was saturated or not (or if the RPKM values were stable or not) 100 the current sequencing depth was saturated or not (or if the RPKM values were stable or not)
78 in terms of genes' expression estimation. If sequencing depth was saturated, the estimated 101 in terms of genes' expression estimation. If sequencing depth was saturated, the estimated
79 RPKM value will be stationary or reproducible. By default, this module will calculate 20 102 RPKM value will be stationary or reproducible. By default, this module will calculate 20
80 RPKM values (using 5%, 10%, ... , 95%,100% of total reads) for each transcripts. 103 RPKM values (using 5%, 10%, ... , 95%,100% of total reads) for each transcripts.
81 104
82 In the output figure, Y axis is "Percent Relative Error" or "Percent Error" which is used 105 In the output figure, Y axis is "Percent Relative Error" or "Percent Error" which is used
83 to measures how the RPKM estimated from subset of reads (i.e. RPKMobs) deviates from real 106 to measures how the RPKM estimated from subset of reads (i.e. RPKMobs) deviates from real
84 expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a 107 expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a
85 proxy, we use the RPKM estimated from total reads to approximate RPKMreal. 108 proxy, we use the RPKM estimated from total reads to approximate RPKMreal.
105 ++++++++++++++ 128 ++++++++++++++
106 129
107 Skip Multiple Hit Reads 130 Skip Multiple Hit Reads
108 Use Multiple hit reads or use only uniquely mapped reads. 131 Use Multiple hit reads or use only uniquely mapped reads.
109 132
110 Only use exonic reads 133 Only use exonic reads
111 Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. 134 Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads.
112 135
113 Output 136 Output
114 ++++++++++++++ 137 ++++++++++++++
115 138
119 4. output.saturation.pdf: 142 4. output.saturation.pdf:
120 143
121 .. image:: http://rseqc.sourceforge.net/_images/saturation.png 144 .. image:: http://rseqc.sourceforge.net/_images/saturation.png
122 :height: 600 px 145 :height: 600 px
123 :width: 600 px 146 :width: 600 px
124 :scale: 80 % 147 :scale: 80 %
125 148
126 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups: 149 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups:
127 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile. 150 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile.
128 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile. 151 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile.
129 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile. 152 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile.
138 dev.off() #close graphical device 161 dev.off() #close graphical device
139 162
140 .. image:: http://rseqc.sourceforge.net/_images/saturation_eg.png 163 .. image:: http://rseqc.sourceforge.net/_images/saturation_eg.png
141 :height: 600 px 164 :height: 600 px
142 :width: 600 px 165 :width: 600 px
143 :scale: 80 % 166 :scale: 80 %
144 167
145 ----- 168 -----
146 169
147 About RSeQC 170 About RSeQC
148 +++++++++++ 171 +++++++++++
149 172
150 The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. 173 The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
151 174
152 The RSeQC package is licensed under the GNU GPL v3 license. 175 The RSeQC package is licensed under the GNU GPL v3 license.
153 176
154 .. image:: http://rseqc.sourceforge.net/_static/logo.png 177 .. image:: http://rseqc.sourceforge.net/_static/logo.png
155 178
156 .. _RSeQC: http://rseqc.sourceforge.net/ 179 .. _RSeQC: http://rseqc.sourceforge.net/
180 ]]>
181 </help>
157 182
183 <expand macro="citations" />
158 184
159 </help>
160 </tool> 185 </tool>