Mercurial > repos > swebb > pycrac
comparison pyCRAC/pyCalculateFDRs.xml @ 0:19b20927172d draft
Uploaded
| author | swebb |
|---|---|
| date | Tue, 18 Jun 2013 09:11:00 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:19b20927172d |
|---|---|
| 1 <tool id ="pyCalculateFDRs" name="pyCalculateFDRs"> | |
| 2 <requirements> | |
| 3 <requirement type="package">pyCRAC</requirement> | |
| 4 </requirements> | |
| 5 <command interpreter="python"> | |
| 6 /usr/local/bin/pyCalculateFDRs.py | |
| 7 -f $ftype.input | |
| 8 --file_type $ftype.file_type | |
| 9 --gtf=$addGTF.gtf | |
| 10 | |
| 11 #if $addGTF.annotate.annotations != "all": | |
| 12 #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto": | |
| 13 --annotation $addGTF.annotate.scan.annotation | |
| 14 #else: | |
| 15 --annotation $addGTF.annotate.annotation | |
| 16 #end if# | |
| 17 #end if# | |
| 18 --chromfile=$addChr.chr | |
| 19 #if $addOpt.options == "edit" | |
| 20 -s $addOpt.sequence | |
| 21 --min $addOpt.min | |
| 22 --minfdr $addOpt.minfdr | |
| 23 --iterations=$addOpt.iterations | |
| 24 --range $addOpt.range | |
| 25 #end if# | |
| 26 -o $output | |
| 27 | |
| 28 </command> | |
| 29 <version_command>/usr/local/bin/pyCalculateFDRs.py --version</version_command> | |
| 30 <inputs> | |
| 31 <conditional name="ftype"> | |
| 32 <param name="file_type" type="select" label="Input File Type --file_type" help="Use bed6, gff or gtf input files containing read/cDNA co-ordinates"> | |
| 33 <option value="gff" selected="true">GFF</option> | |
| 34 <option value="bed">Bed6</option> | |
| 35 <option value="gtf">GTF</option> | |
| 36 </param> | |
| 37 <when value="gff"> | |
| 38 <param format="gff" name="input" type="data" label="Input File --readdatafile" help="GFF format containing read/cDNA co-ordinates" /> | |
| 39 </when> | |
| 40 <when value="gtf"> | |
| 41 <param format="gtf" name="input" type="data" label="Input File --readdatafile" help="GTF format containing read/cDNA co-ordinates" /> | |
| 42 </when> | |
| 43 <when value="bed"> | |
| 44 <param format="bed6" name="input" type="data" label="Input File --readdatafile" help="Bed 6 column format containing read/cDNA co-ordinates" /> | |
| 45 </when> | |
| 46 </conditional> | |
| 47 | |
| 48 <conditional name="addChr"> | |
| 49 <param name="chrfile" type="select" label="Choose Chromosome length file from"> | |
| 50 <option value="default" selected="true">Defaults</option> | |
| 51 <option value="other">History</option> | |
| 52 </param> | |
| 53 <when value="default"> | |
| 54 <param name="chr" type="select" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes"> | |
| 55 <options from_data_table="pycrac_chr"/> | |
| 56 </param> | |
| 57 </when> | |
| 58 <when value="other"> | |
| 59 <param format="tabular" name="chr" type="data" label="Chromosome length file -c" help="This file should have two columns: first column is the names of the chromosomes, second column is length of the chromosomes. Use pyCrac utility pyCalculateChromosomeLengths to create."/> | |
| 60 </when> | |
| 61 </conditional> | |
| 62 | |
| 63 <conditional name="addGTF"> | |
| 64 <param name="gtfFile" type="select" label="Choose GTF File from"> | |
| 65 <option value="default" selected="true">Defaults</option> | |
| 66 <option value="other">History</option> | |
| 67 </param> | |
| 68 <when value="default"> | |
| 69 <param name="gtf" type="select" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"> | |
| 70 <options from_data_table="pycrac_gtf"/> | |
| 71 </param> | |
| 72 <conditional name="annotate"> | |
| 73 <param name="annotations" type="select" label="Select annotation"> | |
| 74 <option value="all" selected="true">All</option> | |
| 75 <option value="manual">Enter in text box</option> | |
| 76 <option value="auto">Scan pyGetGTFSources file</option> | |
| 77 </param> | |
| 78 <when value="all"> | |
| 79 <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | |
| 80 </when> | |
| 81 <when value="manual"> | |
| 82 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | |
| 83 <validator type="empty_field" message="Please enter a value"/> | |
| 84 </param> | |
| 85 </when> | |
| 86 <when value="auto"> | |
| 87 <param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/> | |
| 88 <conditional name="scan"> | |
| 89 <param name="annotations" type="select" label="Scan this file for annotations" help="Choose the correct GTF file then choose GO"> | |
| 90 <option value="wait" selected="true">Waiting</option> | |
| 91 <option value="scanning">Go</option> | |
| 92 </param> | |
| 93 <when value="wait"> | |
| 94 </when> | |
| 95 <when value="scanning"> | |
| 96 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | |
| 97 <options from_dataset="gtf_annotation"> | |
| 98 <column name="name" index="0"/> | |
| 99 <column name="value" index="0"/> | |
| 100 </options> | |
| 101 </param> | |
| 102 </when> | |
| 103 </conditional> | |
| 104 </when> | |
| 105 </conditional> | |
| 106 </when> | |
| 107 <when value="other"> | |
| 108 <param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/> | |
| 109 <conditional name="annotate"> | |
| 110 <param name="annotations" type="select" label="Select annotation"> | |
| 111 <option value="all" selected="true">All</option> | |
| 112 <option value="manual">Enter in text box</option> | |
| 113 <option value="auto">Scan selected file</option> | |
| 114 </param> | |
| 115 <when value="all"> | |
| 116 <param name="annotation" type="hidden" format="txt" size="10" value="all"/> | |
| 117 </when> | |
| 118 <when value="manual"> | |
| 119 <param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool"> | |
| 120 <validator type="empty_field" message="Please enter a value"/> | |
| 121 </param> | |
| 122 </when> | |
| 123 <when value="auto"> | |
| 124 <param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation"> | |
| 125 <options from_dataset="gtf"> | |
| 126 <column name="name" index="1"/> | |
| 127 <column name="value" index="1"/> | |
| 128 <filter type="unique_value" name="unique" column="1"/> | |
| 129 </options> | |
| 130 </param> | |
| 131 </when> | |
| 132 </conditional> | |
| 133 </when> | |
| 134 </conditional> | |
| 135 <conditional name="addOpt"> | |
| 136 <param name="options" type="select" label="Standard options"> | |
| 137 <option value="default" selected="true">Default</option> | |
| 138 <option value="edit">Edit</option> | |
| 139 </param> | |
| 140 <when value="edit"> | |
| 141 <param name="sequence" type="select" label="Align reads to --sequence"> | |
| 142 <option value="genomic" selected="true">Genomic Sequence</option> | |
| 143 <option value="coding">Coding Sequence</option> | |
| 144 </param> | |
| 145 <param format="integer" name="min" type="integer" label="Minimum read coverage --min " value="1" size="10" help="Set the minimal read coverage for a region"> | |
| 146 <validator type="in_range" min="1" message="Please enter a value >= 1"/> | |
| 147 </param> | |
| 148 <param name="minfdr" type="float" label="Minimum FDR threshold --minfdr" value="0.05" size="6" help="Set a minimal FDR threshold for filtering interval data"> | |
| 149 <validator type="in_range" min="0" max="1" message="Please enter a value between 0 and 1"/> | |
| 150 </param> | |
| 151 <param format="integer" name="iterations" type="integer" label="Number of iterations --iterations" value="100" size="6" help="The number of iterations for randomization of read coordinates"> | |
| 152 <validator type="in_range" min="0" message="Please enter a value >= 0"/> | |
| 153 </param> | |
| 154 <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000"> | |
| 155 <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/> | |
| 156 </param> | |
| 157 </when> | |
| 158 <when value="default"> | |
| 159 </when> | |
| 160 </conditional> | |
| 161 <param name="label" type="text" format="txt" size="30" value="pyCalculateFDRs" label="Enter output file label -o" /> | |
| 162 </inputs> | |
| 163 <outputs> | |
| 164 <data format="gtf" name="output" label="${label.value}.gtf"/> | |
| 165 </outputs> | |
| 166 <help> | |
| 167 | |
| 168 .. class:: infomark | |
| 169 | |
| 170 **pyCalculateFDRs** | |
| 171 | |
| 172 By default the FDR value is set to 0.05, meaning that there is a 5% chance that the interval is not significantly enriched. | |
| 173 The tool reports significant intervals in the GTF format and reports overlapping genomic features. | |
| 174 Mutation frequencies are not included but these can be added using the pyCalculateMutationFrequencies tool | |
| 175 | |
| 176 **NOTE!** By default it calls each significant interval an "exon" but this has no meaning! It may overlap with an intron. | |
| 177 Use bedtools to extract those intervals that overlap with introns or other features | |
| 178 | |
| 179 Example of an output file:: | |
| 180 | |
| 181 ##gff-version 2 | |
| 182 # generated by pyCalculateFDRs version 0.0.3, Sat Jun 1 21:16:23 2013 | |
| 183 # pyCalculateFDRs.py -f test_count_output_reads.gtf -r 200 -o test_count_output_FDRs_005.gtf -v -m 0.05 | |
| 184 # chromosome feature source start end minimal_coverage strand . attributes | |
| 185 chrI protein_coding exon 140846 140860 5 - . gene_id "YAL005C"; gene_name "SSA1"; | |
| 186 chrI intergenic_region exon 223118 223164 4 - . gene_id "INT_0_179"; gene_name "INT_0_179"; | |
| 187 chrI intergenic_region exon 71889 71922 3 + . gene_id "INT_0_94"; gene_name "INT_0_94"; | |
| 188 chrII intergenic_region exon 296127 296158 3 - . gene_id "INT_0_365"; gene_name "INT_0_365"; | |
| 189 chrII intergenic_region exon 680697 680722 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; | |
| 190 chrII intergenic_region exon 680827 680846 4 - . gene_id "INT_0_626"; gene_name "INT_0_626"; | |
| 191 chrII snRNA exon 680827 680838 5 - . gene_id "LSR1"; gene_name "LSR1"; | |
| 192 chrII snRNA exon 680951 681001 5 - . gene_id "LSR1"; gene_name "LSR1"; | |
| 193 chrII intergenic_region exon 577985 577996 3 - . gene_id "INT_0_556"; gene_name "INT_0_556"; | |
| 194 chrII protein_coding exon 203838 203887 3 + . gene_id "YBL011W"; gene_name "SCT1"; | |
| 195 chrII protein_coding exon 296127 296158 3 - . gene_id "YBR028C"; gene_name "YBR028C"; | |
| 196 | |
| 197 | |
| 198 pyCalculateFDRs is part of the pyCRAC_ package. Takes interval information in GTF or bed format and calculates False Discovery Rates (FDRs). | |
| 199 | |
| 200 | |
| 201 .. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html | |
| 202 | |
| 203 ------ | |
| 204 | |
| 205 **Parameter list** | |
| 206 | |
| 207 Options:: | |
| 208 | |
| 209 -f read_file, --readdatafile=read_file | |
| 210 Name of the bed/gff/gtf file containing the read/cDNA | |
| 211 coordinates | |
| 212 --file_type=FILE_TYPE | |
| 213 this tool supports bed6, gtf and gff input files. | |
| 214 Please select from 'bed','gtf' or 'gff'. Default=gtf | |
| 215 -o outfile.gtf, --outfile=outfile.gtf | |
| 216 Optional. Provide the name of the output file. Default | |
| 217 is 'selected_intervals.gtf' | |
| 218 -r 100, --range=100 | |
| 219 allows you to set the length of the UTR regions. If | |
| 220 you set '-r 50' or '--range=50', then the program will | |
| 221 set a fixed length (50 bp) regardless of whether the | |
| 222 GTF file has genes with annotated UTRs. | |
| 223 -a protein_coding, --annotation=protein_coding | |
| 224 select which annotation (i.e. protein_coding, ncRNA, | |
| 225 sRNA, rRNA,snoRNA,snRNA, depending on the source of | |
| 226 your GTF file) you would like to focus your analysis | |
| 227 on. Default = all annotations | |
| 228 -c yeast.txt, --chromfile=yeast.txt | |
| 229 Location of the chromosome info file. This file should | |
| 230 have two columns: first column is the names of the | |
| 231 chromosomes, second column is length of the | |
| 232 chromosomes. Default is yeast | |
| 233 --gtf=yeast.gtf | |
| 234 Name of the annotation file. Default is /usr/local/pyC | |
| 235 RAC/db/Saccharomyces_cerevisiae.EF2.59.1.2.gtf | |
| 236 -m MINFDR, --minfdr=MINFDR | |
| 237 To set a minimal FDR threshold for filtering interval | |
| 238 data. Default is 0.05 | |
| 239 --min=MIN | |
| 240 to set a minimal read coverages for a region. Regions | |
| 241 with coverage less than minimum will be ignoredve an | |
| 242 FDR of zero | |
| 243 --iterations=ITERATIONS | |
| 244 to set the number of iterations for randomization of | |
| 245 read coordinates. Default=100 | |
| 246 </help> | |
| 247 </tool> |
