comparison proteomics_rnaseq_reduced_db_workflow_v2.ga @ 1:20d9fb1ba210 default tip

Replace several tabular manipulations with regex_replace tool
author Jim Johnson <jj@umn.edu>
date Thu, 20 Mar 2014 21:50:05 -0500
parents
children
comparison
equal deleted inserted replaced
0:9d5e59373c84 1:20d9fb1ba210
1 {
2 "a_galaxy_workflow": "true",
3 "annotation": "Filter out proteins that have a transcript expression level, as quantified by RNA-Seq data, below a certain threshold.",
4 "format-version": "0.1",
5 "name": "Proteomics Reduced DB v2",
6 "steps": {
7 "0": {
8 "annotation": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz",
9 "id": 0,
10 "input_connections": {},
11 "inputs": [
12 {
13 "description": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz",
14 "name": "Ensembl Protein FASTA (reference proteome)"
15 }
16 ],
17 "name": "Input dataset",
18 "outputs": [],
19 "position": {
20 "left": 208,
21 "top": 200
22 },
23 "tool_errors": null,
24 "tool_id": null,
25 "tool_state": "{\"name\": \"Ensembl Protein FASTA (reference proteome)\"}",
26 "tool_version": null,
27 "type": "data_input",
28 "user_outputs": []
29 },
30 "1": {
31 "annotation": "Ensembl reference fasta with only chromosome assigned sequences. For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz",
32 "id": 1,
33 "input_connections": {},
34 "inputs": [
35 {
36 "description": "Ensembl reference fasta with only chromosome assigned sequences. For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz",
37 "name": "Ensembl Genome Reference Fasta"
38 }
39 ],
40 "name": "Input dataset",
41 "outputs": [],
42 "position": {
43 "left": 209,
44 "top": 292
45 },
46 "tool_errors": null,
47 "tool_id": null,
48 "tool_state": "{\"name\": \"Ensembl Genome Reference Fasta\"}",
49 "tool_version": null,
50 "type": "data_input",
51 "user_outputs": []
52 },
53 "2": {
54 "annotation": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz",
55 "id": 2,
56 "input_connections": {},
57 "inputs": [
58 {
59 "description": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz",
60 "name": "Ensembl GTF File (gene models)"
61 }
62 ],
63 "name": "Input dataset",
64 "outputs": [],
65 "position": {
66 "left": 213,
67 "top": 456
68 },
69 "tool_errors": null,
70 "tool_id": null,
71 "tool_state": "{\"name\": \"Ensembl GTF File (gene models)\"}",
72 "tool_version": null,
73 "type": "data_input",
74 "user_outputs": []
75 },
76 "3": {
77 "annotation": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
78 "id": 3,
79 "input_connections": {},
80 "inputs": [
81 {
82 "description": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
83 "name": "RNA-Seq left paired-end fastq"
84 }
85 ],
86 "name": "Input dataset",
87 "outputs": [],
88 "position": {
89 "left": 220,
90 "top": 563
91 },
92 "tool_errors": null,
93 "tool_id": null,
94 "tool_state": "{\"name\": \"RNA-Seq left paired-end fastq\"}",
95 "tool_version": null,
96 "type": "data_input",
97 "user_outputs": []
98 },
99 "4": {
100 "annotation": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
101 "id": 4,
102 "input_connections": {},
103 "inputs": [
104 {
105 "description": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
106 "name": "RNA-Seq right paired-end fastq"
107 }
108 ],
109 "name": "Input dataset",
110 "outputs": [],
111 "position": {
112 "left": 221,
113 "top": 673
114 },
115 "tool_errors": null,
116 "tool_id": null,
117 "tool_state": "{\"name\": \"RNA-Seq right paired-end fastq\"}",
118 "tool_version": null,
119 "type": "data_input",
120 "user_outputs": []
121 },
122 "5": {
123 "annotation": "Convert peptide fasta to a 2-column tabular file. Keep all the head info.",
124 "id": 5,
125 "input_connections": {
126 "input": {
127 "id": 0,
128 "output_name": "output"
129 }
130 },
131 "inputs": [],
132 "name": "FASTA-to-Tabular",
133 "outputs": [
134 {
135 "name": "output",
136 "type": "tabular"
137 }
138 ],
139 "position": {
140 "left": 538,
141 "top": 267
142 },
143 "post_job_actions": {},
144 "tool_errors": null,
145 "tool_id": "fasta2tab",
146 "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"__rerun_remap_job_id__\": null}",
147 "tool_version": "1.1.0",
148 "type": "tool",
149 "user_outputs": []
150 },
151 "6": {
152 "annotation": "Given a GTF file and the reference genome, this tool constructs a synthetic transcriptome that will be used for isoform quantification during \"-calculate expression\".",
153 "id": 6,
154 "input_connections": {
155 "reference|gtf": {
156 "id": 2,
157 "output_name": "output"
158 },
159 "reference|reference_fasta_file": {
160 "id": 1,
161 "output_name": "output"
162 }
163 },
164 "inputs": [],
165 "name": "RSEM prepare reference",
166 "outputs": [
167 {
168 "name": "reference_file",
169 "type": "rsem_ref"
170 }
171 ],
172 "position": {
173 "left": 419,
174 "top": 388
175 },
176 "post_job_actions": {},
177 "tool_errors": null,
178 "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_prepare_reference/1.1.17",
179 "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"ref_type\\\": \\\"genomic\\\", \\\"gtf\\\": null, \\\"reference_fasta_file\\\": null, \\\"__current_case__\\\": 1}\", \"reference_name\": \"\\\"primaryEnsemblGtfRef\\\"\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"polya\": \"{\\\"polya_use\\\": \\\"add\\\", \\\"polya_length\\\": \\\"125\\\", \\\"__current_case__\\\": 0}\", \"transcript_to_gene_map\": \"null\", \"ntog\": \"\\\"False\\\"\"}",
180 "tool_version": "1.1.17",
181 "type": "tool",
182 "user_outputs": []
183 },
184 "7": {
185 "annotation": "",
186 "id": 7,
187 "input_connections": {
188 "infile": {
189 "id": 5,
190 "output_name": "output"
191 }
192 },
193 "inputs": [],
194 "name": "Regex Replace",
195 "outputs": [
196 {
197 "name": "outfile",
198 "type": "txt"
199 }
200 ],
201 "position": {
202 "left": 802,
203 "top": 281
204 },
205 "post_job_actions": {
206 "ChangeDatatypeActionoutfile": {
207 "action_arguments": {
208 "newtype": "tabular"
209 },
210 "action_type": "ChangeDatatypeAction",
211 "output_name": "outfile"
212 }
213 },
214 "tool_errors": null,
215 "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0",
216 "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.* transcript:)(ENST\\\\\\\\d+)(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1\\\\\\\\2\\\\\\\\3\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}",
217 "tool_version": "1.0.0",
218 "type": "tool",
219 "user_outputs": []
220 },
221 "8": {
222 "annotation": "Given then RNA-Seq reads (fastq) and synthetic transcriptome (from \"-prepare reference\"), this tool quantifies the abundances of each mRNA transcript within the GTF file.",
223 "id": 8,
224 "input_connections": {
225 "input|fastq|fastq1": {
226 "id": 3,
227 "output_name": "output"
228 },
229 "input|fastq|fastq2": {
230 "id": 4,
231 "output_name": "output"
232 },
233 "reference|rsem_ref": {
234 "id": 6,
235 "output_name": "reference_file"
236 }
237 },
238 "inputs": [],
239 "name": "RSEM calculate expression",
240 "outputs": [
241 {
242 "name": "gene_abundances",
243 "type": "tabular"
244 },
245 {
246 "name": "isoform_abundances",
247 "type": "tabular"
248 },
249 {
250 "name": "transcript_bam",
251 "type": "bam"
252 },
253 {
254 "name": "transcript_sorted_bam",
255 "type": "bam"
256 },
257 {
258 "name": "genome_bam",
259 "type": "bam"
260 },
261 {
262 "name": "genome_sorted_bam",
263 "type": "bam"
264 },
265 {
266 "name": "log",
267 "type": "txt"
268 }
269 ],
270 "position": {
271 "left": 719,
272 "top": 523
273 },
274 "post_job_actions": {},
275 "tool_errors": null,
276 "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_calculate_expression/1.1.17",
277 "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"rsem_ref\\\": null, \\\"refSrc\\\": \\\"history\\\", \\\"__current_case__\\\": 1}\", \"rsem_options\": \"{\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}\", \"rsem_outputs\": \"{\\\"result_bams\\\": \\\"none\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"seedlength\": \"\\\"25\\\"\", \"sample\": \"\\\"rsem_sample\\\"\", \"forward_prob\": \"\\\"0.5\\\"\", \"input\": \"{\\\"fastq\\\": {\\\"fastq2\\\": null, \\\"fastq1\\\": null, \\\"matepair\\\": \\\"paired\\\", \\\"__current_case__\\\": 1}, \\\"bowtie_options\\\": {\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}, \\\"fastq_select\\\": \\\"--phred33-quals\\\", \\\"__current_case__\\\": 0, \\\"format\\\": \\\"fastq\\\"}\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
278 "tool_version": "1.1.17",
279 "type": "tool",
280 "user_outputs": []
281 },
282 "9": {
283 "annotation": "Selection of lower threshold of transcriptional abundance in TPM required for inclusion of the corresponding protein in the reduced database.",
284 "id": 9,
285 "input_connections": {
286 "input": {
287 "id": 8,
288 "output_name": "isoform_abundances"
289 }
290 },
291 "inputs": [],
292 "name": "Filter",
293 "outputs": [
294 {
295 "name": "out_file1",
296 "type": "input"
297 }
298 ],
299 "position": {
300 "left": 991,
301 "top": 591
302 },
303 "post_job_actions": {},
304 "tool_errors": null,
305 "tool_id": "Filter1",
306 "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3>0.000001\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
307 "tool_version": "1.1.0",
308 "type": "tool",
309 "user_outputs": []
310 },
311 "10": {
312 "annotation": "Add a column with the RSEM TPM times a million.",
313 "id": 10,
314 "input_connections": {
315 "input": {
316 "id": 9,
317 "output_name": "out_file1"
318 }
319 },
320 "inputs": [],
321 "name": "Compute",
322 "outputs": [
323 {
324 "name": "out_file1",
325 "type": "input"
326 }
327 ],
328 "position": {
329 "left": 1199,
330 "top": 574
331 },
332 "post_job_actions": {},
333 "tool_errors": null,
334 "tool_id": "Add_a_column1",
335 "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3*1000000\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"round\": \"\\\"no\\\"\"}",
336 "tool_version": "1.1.0",
337 "type": "tool",
338 "user_outputs": []
339 },
340 "11": {
341 "annotation": "",
342 "id": 11,
343 "input_connections": {
344 "input1": {
345 "id": 7,
346 "output_name": "outfile"
347 },
348 "input2": {
349 "id": 10,
350 "output_name": "out_file1"
351 }
352 },
353 "inputs": [],
354 "name": "Join two Datasets",
355 "outputs": [
356 {
357 "name": "out_file1",
358 "type": "input"
359 }
360 ],
361 "position": {
362 "left": 1350,
363 "top": 419
364 },
365 "post_job_actions": {},
366 "tool_errors": null,
367 "tool_id": "join1",
368 "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"3\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"no_fill\\\", \\\"__current_case__\\\": 0}\", \"unmatched\": \"\\\"\\\"\", \"input1\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
369 "tool_version": "2.0.2",
370 "type": "tool",
371 "user_outputs": []
372 },
373 "12": {
374 "annotation": "",
375 "id": 12,
376 "input_connections": {
377 "infile": {
378 "id": 11,
379 "output_name": "out_file1"
380 }
381 },
382 "inputs": [],
383 "name": "Regex Replace",
384 "outputs": [
385 {
386 "name": "outfile",
387 "type": "txt"
388 }
389 ],
390 "position": {
391 "left": 1545,
392 "top": 546
393 },
394 "post_job_actions": {
395 "ChangeDatatypeActionoutfile": {
396 "action_arguments": {
397 "newtype": "tabular"
398 },
399 "action_type": "ChangeDatatypeAction",
400 "output_name": "outfile"
401 }
402 },
403 "tool_errors": null,
404 "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0",
405 "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1 tmp:\\\\\\\\8\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}",
406 "tool_version": "1.0.0",
407 "type": "tool",
408 "user_outputs": []
409 },
410 "13": {
411 "annotation": "Final reduced database after application of a TPM cut-off.",
412 "id": 13,
413 "input_connections": {
414 "input": {
415 "id": 12,
416 "output_name": "outfile"
417 }
418 },
419 "inputs": [],
420 "name": "Tabular-to-FASTA",
421 "outputs": [
422 {
423 "name": "output",
424 "type": "fasta"
425 }
426 ],
427 "position": {
428 "left": 1743,
429 "top": 484
430 },
431 "post_job_actions": {},
432 "tool_errors": null,
433 "tool_id": "tab2fasta",
434 "tool_state": "{\"title_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\", \"__page__\": 0, \"seq_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"__rerun_remap_job_id__\": null, \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
435 "tool_version": "1.1.0",
436 "type": "tool",
437 "user_outputs": []
438 },
439 "14": {
440 "annotation": "Format FASTA to desired width.",
441 "id": 14,
442 "input_connections": {
443 "input": {
444 "id": 13,
445 "output_name": "output"
446 }
447 },
448 "inputs": [],
449 "name": "FASTA Width",
450 "outputs": [
451 {
452 "name": "output",
453 "type": "input"
454 }
455 ],
456 "position": {
457 "left": 1939,
458 "top": 569
459 },
460 "post_job_actions": {},
461 "tool_errors": null,
462 "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_formatter/cshl_fasta_formatter/1.0.0",
463 "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"width\": \"\\\"80\\\"\"}",
464 "tool_version": "1.0.0",
465 "type": "tool",
466 "user_outputs": []
467 }
468 }
469 }