comparison rsem_prepare_reference.xml @ 0:ca988deacfd1

Uploaded
author jjohnson
date Fri, 07 Feb 2014 08:07:29 -0500
parents
children 59459de65740
comparison
equal deleted inserted replaced
-1:000000000000 0:ca988deacfd1
1 <tool id="rsem_prepare_reference" name="RSEM prepare reference" version="1.1.17">
2 <description></description>
3 <requirements>
4 <requirement type="package" version="1.1.17">rsem</requirement>
5 <requirement type="package" version="1.0.0">bowtie</requirement>
6 </requirements>
7 <command>
8 mkdir $reference_file.extra_files_path &amp;&amp;
9 cd $reference_file.extra_files_path &amp;&amp;
10 rsem-prepare-reference
11 #if $polya.polya_use == 'add':
12 #if $polya.polya_length:
13 --polyA-length $polya.polya_length
14 #end if
15 #elif $polya.polya_use == 'subset':
16 --no-polyA-subset $polya.no_polya_subset
17 #if $polya.polya_length:
18 --polyA-length $polya.polya_length
19 #end if
20 #elif $polya.polya_use == 'none':
21 --no-polyA
22 #end if
23 $ntog
24 #if $transcript_to_gene_map:
25 --transcript-to-gene-map $transcript_to_gene_map
26 #end if
27 #if $reference.ref_type == 'transcripts':
28 $reference.reference_fasta_file
29 #else:
30 --gtf $reference.gtf
31 $reference.reference_fasta_file
32 #end if
33 $reference_name
34 </command>
35 <inputs>
36 <conditional name="reference">
37 <param name="ref_type" type="select" label="Reference transcript source">
38 <option value="transcripts">transcript fasta</option>
39 <option value="genomic">reference genome and gtf</option>
40 </param>
41 <when value="transcripts">
42 <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file"
43 help="The files should contain the sequences of transcripts."/>
44 </when>
45 <when value="genomic">
46 <param name="reference_fasta_file" type="data" format="fasta" label="reference fasta file"
47 help="The file should contain the sequence of an entire genome."/>
48 <param name="gtf" type="data" format="gtf" label="gtf"
49 help="extract transcript reference sequences using the gene annotations specified in this GTF" />
50 </when>
51 </conditional>
52 <param name="transcript_to_gene_map" type="data" format="tabular" optional="true" label="Map of gene ids to transcript (isoform) ids" >
53 <help>
54 Each line of should be of the form: gene_id transcript_id ( with the two fields separated by a tab character )
55 The map can be obtained from the UCSC table browser
56 group: Genes and Gene Prediction Tracks
57 table: knownIsoforms
58 Without a map:
59 If a reference genome and gtf is used, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file.
60 Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene.
61 </help>
62 </param>
63 <param name="reference_name" type="text" value="rsem_ref_name" label="reference name">
64 <help>A one work name for this RSEM reference containing only letters, digits, and underscore characters</help>
65 <validator type="regex" message="Use only letters, digits, and underscore characters">^\w+$</validator>
66 </param>
67 <conditional name="polya">
68 <param name="polya_use" type="select" label="PolyA ">
69 <option value="add" selected="true">Add poly(A) tails to all transcripts</option>
70 <option value="subset">Exclude poly(A) tails from selected transcripts</option>
71 <option value="none">Do not add poly(A) tails to any transcripts</option>
72 </param>
73 <when value="add">
74 <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
75 <validator type="in_range" message="must be positive " min="1"/>
76 </param>
77 </when>
78 <when value="subset">
79 <param name="no_polya_subset" type="data" format="tabular" optional="true" label="List of transcript IDs (one per line) that should should not have polyA tails added."/>
80 <param name="polya_length" type="integer" value="125" optional="true" label="The length of the poly(A) tails to be added. (Default: 125)">
81 <validator type="in_range" message="must be positive " min="1"/>
82 </param>
83 </when>
84 <when value="none"/>
85 </conditional>
86 <param name="ntog" type="boolean" truevalue="--no-ntog" falsevalue="" checked="false" label="Disable the conversion of 'N' characters to 'G' characters in the reference sequences" help="Bowite uses the automatic N to G conversion to to align against all positions in the reference."/>
87 </inputs>
88 <stdio>
89 <exit_code range="1:" level="fatal" description="Error Running RSEM" />
90 </stdio>
91 <outputs>
92 <data format="rsem_ref" name="reference_file" label="RSEM ${reference_name} reference"/>
93 </outputs>
94 <tests>
95 <test>
96 <param name="ref_type" value="genomic"/>
97 <param name="reference_fasta_file" value="ref.fasta" ftype="fasta"/>
98 <param name="gtf" value="ref.gtf" ftype="gtf"/>
99 <param name="reference_name" value="ref"/>
100 <output name="rsem_ref">
101 <assert_contents>
102 <has_text text="ref.grp" />
103 </assert_contents>
104 </output>
105 </test>
106 </tests>
107 <help>
108
109 RSEM HOME PAGE - http://deweylab.biostat.wisc.edu/rsem/
110
111 NAME
112 rsem-prepare-reference
113
114 SYNOPSIS
115 rsem-prepare-reference [options] reference_fasta_file(s) reference_name
116
117 DESCRIPTION
118 The rsem-prepare-reference program extracts/preprocesses the reference sequences and builds Bowtie indices using default parameters.
119 This program is used in conjunction with the 'rsem-calculate-expression' program.
120
121 INPUTS
122 A fasta file of transcripts
123 or
124 A genome sequence fasta file and a GTF gene annotation file. (When using UCSC data, include the related knownIsoforms.txt)
125
126 </help>
127 </tool>