comparison RPKM_saturation.xml @ 51:09846d5169fa draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rseqc commit 37fb1988971807c6a072e1afd98eeea02329ee83
author iuc
date Tue, 14 Mar 2017 10:23:21 -0400
parents f242ee103277
children 5873cd7afb67
comparison
equal deleted inserted replaced
50:f242ee103277 51:09846d5169fa
1 <tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="2.4galaxy1"> 1 <tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="@WRAPPER_VERSION@">
2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> 2 <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description>
3 3
4 <macros> 4 <macros>
5 <import>rseqc_macros.xml</import> 5 <import>rseqc_macros.xml</import>
6 </macros> 6 </macros>
7 7
8 <requirements> 8 <expand macro="requirements" />
9 <expand macro="requirement_package_r" />
10 <expand macro="requirement_package_numpy" />
11 <expand macro="requirement_package_rseqc" />
12 </requirements>
13 9
14 <expand macro="stdio" /> 10 <expand macro="stdio" />
15 11
16 <version_command><![CDATA[RPKM_saturation.py --version]]></version_command> 12 <version_command><![CDATA[RPKM_saturation.py --version]]></version_command>
17 13
18 <command><![CDATA[ 14 <command><![CDATA[
19 RPKM_saturation.py -i $input -o output -r $refgene 15 RPKM_saturation.py -i '${input}' -o output -r '${refgene}'
20 16
21 #if str($strand_type.strand_specific) == "pair" 17 #if str($strand_type.strand_specific) == "pair"
22 -d 18 -d
23 #if str($strand_type.pair_type) == "sd" 19 #if str($strand_type.pair_type) == "sd"
24 '1++,1--,2+-,2-+' 20 '1++,1--,2+-,2-+'
34 #else 30 #else
35 '+-,-+' 31 '+-,-+'
36 #end if 32 #end if
37 #end if 33 #end if
38 34
39 -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff 35 -l ${percentileFloor} -u ${percentileCeiling} -s ${percentileStep} -c ${rpkmCutoff}
40 ]]> 36 ]]>
41 </command> 37 </command>
42 38
43 <inputs> 39 <inputs>
44 <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/> 40 <expand macro="bam_param" />
45 <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> 41 <expand macro="refgene_param" />
46 <conditional name="strand_type"> 42 <expand macro="strand_type_param" />
47 <param name="strand_specific" type="select" label="Strand-specific?" value="None">
48 <option value="none">None</option>
49 <option value="pair">Pair-End RNA-seq</option>
50 <option value="single">Single-End RNA-seq</option>
51 </param>
52 <when value="pair">
53 <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd" help="(--strand)">
54 <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option>
55 <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option>
56 </param>
57 </when>
58 <when value="single">
59 <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s" help="(--strand)">
60 <option value="s">positive --> positive; negative --> negative</option>
61 <option value="d">positive --> negative; negative --> positive</option>
62 </param>
63 </when>
64 <when value="none"></when>
65 </conditional>
66 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" help="(--percentile-floor)"/> 43 <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" help="(--percentile-floor)"/>
67 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" help="(--percentile-ceiling)" /> 44 <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" help="(--percentile-ceiling)" />
68 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" help="(--percentile-step)" /> 45 <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" help="(--percentile-step)" />
69 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" help="(--rpkm-cutoff)" /> 46 <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" help="(--rpkm-cutoff)" />
70 <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" /> 47 <expand macro="mapq_param" />
48 <expand macro="rscript_output_param" />
71 </inputs> 49 </inputs>
72 50
73 <outputs> 51 <outputs>
74 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/> 52 <expand macro="pdf_output_data" filename="output.saturation.pdf" />
75 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/> 53 <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM xls)"/>
76 <data format="txt" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/> 54 <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count xls)"/>
77 <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/> 55 <expand macro="rscript_output_data" filename="output.saturation.r" />
78 </outputs> 56 </outputs>
79 57
80 <tests> 58 <tests>
81 <test> 59 <test>
82 <param name="input" value="pairend_strandspecific_51mer_hg19_random.bam"/> 60 <param name="input" value="pairend_strandspecific_51mer_hg19_random.bam"/>
83 <param name="refgene" value="hg19.HouseKeepingGenes_30.bed"/> 61 <param name="refgene" value="hg19.HouseKeepingGenes_30.bed"/>
62 <param name="rscript_output" value="true" />
84 <output name="outputxls"> 63 <output name="outputxls">
85 <assert_contents> 64 <assert_contents>
86 <has_n_columns n="26" /> 65 <has_n_columns n="26" />
87 <has_line_matching expression="chr1\t16174358\t16266950\tNM_015001.*" /> 66 <has_line_matching expression="chr1\t16174358\t16266950\tNM_015001.*" />
88 </assert_contents> 67 </assert_contents>
97 <assert_contents> 76 <assert_contents>
98 <has_text text="pdf('output.saturation.pdf')" /> 77 <has_text text="pdf('output.saturation.pdf')" />
99 <has_line_matching expression="S5=c\(\d+\.\d+\)" /> 78 <has_line_matching expression="S5=c\(\d+\.\d+\)" />
100 </assert_contents> 79 </assert_contents>
101 </output> 80 </output>
81 <output name="outputpdf" file="output.saturation.pdf" compare="sim_size" />
102 </test> 82 </test>
103 </tests> 83 </tests>
104 84
105 <help><![CDATA[ 85 <help><![CDATA[
106 RPKM_saturation.py 86 RPKM_saturation.py
118 In the output figure, Y axis is "Percent Relative Error" or "Percent Error" which is used 98 In the output figure, Y axis is "Percent Relative Error" or "Percent Error" which is used
119 to measures how the RPKM estimated from subset of reads (i.e. RPKMobs) deviates from real 99 to measures how the RPKM estimated from subset of reads (i.e. RPKMobs) deviates from real
120 expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a 100 expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a
121 proxy, we use the RPKM estimated from total reads to approximate RPKMreal. 101 proxy, we use the RPKM estimated from total reads to approximate RPKMreal.
122 102
123 .. image:: http://rseqc.sourceforge.net/_images/RelativeError.png 103 .. image:: $PATH_TO_IMAGES/RelativeError.png
124 :height: 80 px 104 :height: 80 px
125 :width: 400 px 105 :width: 400 px
126 :scale: 100 % 106 :scale: 100 %
127 107
128 Inputs 108 Inputs
152 1. output..eRPKM.xls: RPKM values for each transcript 132 1. output..eRPKM.xls: RPKM values for each transcript
153 2. output.rawCount.xls: Raw count for each transcript 133 2. output.rawCount.xls: Raw count for each transcript
154 3. output.saturation.r: R script to generate plot 134 3. output.saturation.r: R script to generate plot
155 4. output.saturation.pdf: 135 4. output.saturation.pdf:
156 136
157 .. image:: http://rseqc.sourceforge.net/_images/saturation.png 137 .. image:: $PATH_TO_IMAGES/saturation.png
158 :height: 600 px 138 :height: 600 px
159 :width: 600 px 139 :width: 600 px
160 :scale: 80 % 140 :scale: 80 %
161 141
162 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups: 142 - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups:
171 x &lt;- seq(5,100,5) #resampling percentage (5,10,15,...,100) 151 x &lt;- seq(5,100,5) #resampling percentage (5,10,15,...,100)
172 rpkm &lt;- c(32.95,35.43,35.15,36.04,36.41,37.76,38.96,38.62,37.81,38.14,37.97,38.58,38.59,38.54,38.67, 38.67,38.87,38.68, 38.42, 38.23) #Paste RPKM values calculated from each subsets 152 rpkm &lt;- c(32.95,35.43,35.15,36.04,36.41,37.76,38.96,38.62,37.81,38.14,37.97,38.58,38.59,38.54,38.67, 38.67,38.87,38.68, 38.42, 38.23) #Paste RPKM values calculated from each subsets
173 scatter.smooth(x,100*abs(rpkm-rpkm[length(rpkm)])/(rpkm[length(rpkm)]),type="p",ylab="Precent Relative Error",xlab="Resampling Percentage") 153 scatter.smooth(x,100*abs(rpkm-rpkm[length(rpkm)])/(rpkm[length(rpkm)]),type="p",ylab="Precent Relative Error",xlab="Resampling Percentage")
174 dev.off() #close graphical device 154 dev.off() #close graphical device
175 155
176 .. image:: http://rseqc.sourceforge.net/_images/saturation_eg.png 156 .. image:: $PATH_TO_IMAGES/saturation_eg.png
177 :height: 600 px 157 :height: 600 px
178 :width: 600 px 158 :width: 600 px
179 :scale: 80 % 159 :scale: 80 %
180 160
181 ----- 161 @ABOUT@
182 162
183 About RSeQC
184 +++++++++++
185
186 The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
187
188 The RSeQC package is licensed under the GNU GPL v3 license.
189
190 .. image:: http://rseqc.sourceforge.net/_static/logo.png
191
192 .. _RSeQC: http://rseqc.sourceforge.net/
193 ]]> 163 ]]>
194 </help> 164 </help>
195 165
196 <expand macro="citations" /> 166 <expand macro="citations" />
197 167