comparison inner_distance.xml @ 49:6b33e31bda10 draft

Uploaded tar based on https://github.com/lparsons/galaxy_tools/tree/master/tools/rseqc 1a3c419bc0ded7c40cb2bc3e7c87bfb01ddfeba2
author lparsons
date Thu, 16 Jul 2015 17:43:43 -0400
parents eb339c5849bb
children 09846d5169fa
comparison
equal deleted inserted replaced
48:2e6190c29c54 49:6b33e31bda10
1 <tool id="rseqc_inner_distance" name="Inner Distance" version="2.4"> 1 <tool id="rseqc_inner_distance" name="Inner Distance" version="2.4galaxy1">
2 <description>calculate the inner distance (or insert size) between two paired RNA reads</description> 2 <description>calculate the inner distance (or insert size) between two paired RNA reads</description>
3
4 <macros>
5 <import>rseqc_macros.xml</import>
6 </macros>
7
3 <requirements> 8 <requirements>
4 <requirement type="package" version="3.0.3">R</requirement> 9 <expand macro="requirement_package_r" />
5 <requirement type="package" version="1.7.1">numpy</requirement> 10 <expand macro="requirement_package_numpy" />
6 <requirement type="package" version="2.4">rseqc</requirement> 11 <expand macro="requirement_package_rseqc" />
7 </requirements> 12 </requirements>
8 <command> 13
14 <expand macro="stdio" />
15
16 <version_command><![CDATA[inner_distance.py --version]]></version_command>
17
18 <command><![CDATA[
9 inner_distance.py -i $input -o output -r $refgene 19 inner_distance.py -i $input -o output -r $refgene
20 --sample-size $sample_size
21 --lower-bound $lowerBound
22 --upper-bound $upperBound
23 --step $step
24 --mapq $mapq
25 ]]>
26 </command>
10 27
11 #if $bounds.hasLowerBound 28 <inputs>
12 -l $bounds.lowerBound 29 <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)" />
13 #end if 30 <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)" />
31 <param name="sample_size" type="integer" label="Number of read-pairs used to estimate inner distance (default = 1000000)" value="1000000" help="(--sample-size)"/>
32 <param name="lowerBound" type="integer" value="-250" label="Lower bound (bp, default=-250)" help="Used for plotting histogram (--lower-bound)"/>
33 <param name="upperBound" type="integer" value="250" label="Upper bound (bp, default=250)" help="Used for plotting histogram (--upper-bound)"/>
34 <param name="step" type="integer" value="5" label="Step size of histogram (bp, default=5)" help="(--step)"/>
35 <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" />
36 </inputs>
14 37
15 #if $bounds2.hasUpperBound
16 -u $bounds2.upperBound
17 #end if
18
19 #if $steps.step
20 -s $steps.stepSize
21 #end if
22 </command>
23 <stdio>
24 <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" />
25 <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" />
26 </stdio>
27 <inputs>
28 <param name="input" type="data" format="bam,sam" label="input bam/sam file" />
29 <param name="refgene" type="data" format="bed" label="reference gene model" />
30 <conditional name="bounds">
31 <param name="hasLowerBound" type="boolean" label="Specify lower bound" value="false"/>
32 <when value="true">
33 <param name="lowerBound" type="integer" value="-250" label="Estimated Lower Bound (bp, default=-250)" />
34 </when>
35 </conditional>
36 <conditional name="bounds2">
37 <param name="hasUpperBound" type="boolean" label="Specify upper bound" value="false" />
38 <when value="true">
39 <param name="upperBound" type="integer" value="250" label="Estimated Upper Bound (bp, default=250)" />
40 </when>
41 </conditional>
42 <conditional name="steps">
43 <param name="step" type="boolean" label="Specify step size" value="false" />
44 <when value="true">
45 <param name="stepSize" type="integer" value="5" label="Step size (bp, default=5)" />
46 </when>
47 </conditional>
48 </inputs>
49 <outputs> 38 <outputs>
50 <data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt" label="${tool.name} on ${on_string} (Text)"/> 39 <data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt" label="${tool.name} on ${on_string} (Text)"/>
51 <data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" label="${tool.name} on ${on_string} (Freq Text)" /> 40 <data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" label="${tool.name} on ${on_string} (Freq Text)" />
52 <data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> 41 <data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" label="${tool.name} on ${on_string} (PDF)" />
53 <data format="txt" name="outputr" from_work_dir="output.inner_distance_plot.r" label="${tool.name} on ${on_string} (R Script)" /> 42 <data format="txt" name="outputr" from_work_dir="output.inner_distance_plot.r" label="${tool.name} on ${on_string} (R Script)" />
54 </outputs> 43 </outputs>
55 <help> 44
45 <tests>
46 <test>
47 <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/>
48 <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/>
49 <output name="outputtxt" file="output.inner_distance.txt"/>
50 <output name="outputfreqtxt" file="output.inner_distance_freq.txt"/>
51 <output name="outputpdf" file="output.inner_distance_plot.pdf"/>
52 <output name="outputr" file="output.inner_distance_plot.r"/>
53 </test>
54 </tests>
55
56 <help><![CDATA[
56 inner_distance.py 57 inner_distance.py
57 +++++++++++++++++ 58 +++++++++++++++++
58 59
59 This module is used to calculate the inner distance (or insert size) between two paired RNA 60 This module is used to calculate the inner distance (or insert size) between two paired RNA
60 reads. The distance is the mRNA length between two paired fragments. We first determine the 61 reads. The distance is the mRNA length between two paired fragments. We first determine the
61 genomic (DNA) size between two paired reads: D_size = read2_start - read1_end, then 62 genomic (DNA) size between two paired reads: D_size = read2_start - read1_end, then
62 63
63 * if two paired reads map to the same exon: inner distance = D_size 64 * if two paired reads map to the same exon: inner distance = D_size
64 * if two paired reads map to different exons:inner distance = D_size - intron_size 65 * if two paired reads map to different exons:inner distance = D_size - intron_size
65 * if two paired reads map non-exonic region (such as intron and intergenic region): inner distance = D_size 66 * if two paired reads map non-exonic region (such as intron and intergenic region): inner distance = D_size
66 * The inner_distance might be a negative value if two fragments were overlapped. 67 * The inner_distance might be a negative value if two fragments were overlapped.
67 68
68 NOTE: Not all read pairs were used to estimate the inner distance distribution. Those low 69 NOTE: Not all read pairs were used to estimate the inner distance distribution. Those low
69 quality, PCR duplication, multiple mapped reads were skipped. 70 quality, PCR duplication, multiple mapped reads were skipped.
70 71
71 Inputs 72 Inputs
72 ++++++++++++++ 73 ++++++++++++++
73 74
74 Input BAM/SAM file 75 Input BAM/SAM file
100 4. output.inner_distance_plot.pdf: histogram plot 101 4. output.inner_distance_plot.pdf: histogram plot
101 102
102 .. image:: http://rseqc.sourceforge.net/_images/inner_distance.png 103 .. image:: http://rseqc.sourceforge.net/_images/inner_distance.png
103 :height: 600 px 104 :height: 600 px
104 :width: 600 px 105 :width: 600 px
105 :scale: 80 % 106 :scale: 80 %
106 107
107 108
108 ----- 109 -----
109 110
110 About RSeQC 111 About RSeQC
111 +++++++++++ 112 +++++++++++
112 113
113 The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. 114 The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation.
114 115
115 The RSeQC package is licensed under the GNU GPL v3 license. 116 The RSeQC package is licensed under the GNU GPL v3 license.
116 117
117 .. image:: http://rseqc.sourceforge.net/_static/logo.png 118 .. image:: http://rseqc.sourceforge.net/_static/logo.png
118 119
119 .. _RSeQC: http://rseqc.sourceforge.net/ 120 .. _RSeQC: http://rseqc.sourceforge.net/
120 121
122 ]]>
123 </help>
121 124
122 </help> 125 <expand macro="citations" />
126
123 </tool> 127 </tool>