Mercurial > repos > galaxyp > proteomics_rnaseq_reduced_db_workflow
comparison proteomics_rnaseq_reduced_db_workflow_v2.ga @ 1:20d9fb1ba210 default tip
Replace several tabular manipulations with regex_replace tool
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 20 Mar 2014 21:50:05 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:9d5e59373c84 | 1:20d9fb1ba210 |
---|---|
1 { | |
2 "a_galaxy_workflow": "true", | |
3 "annotation": "Filter out proteins that have a transcript expression level, as quantified by RNA-Seq data, below a certain threshold.", | |
4 "format-version": "0.1", | |
5 "name": "Proteomics Reduced DB v2", | |
6 "steps": { | |
7 "0": { | |
8 "annotation": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz", | |
9 "id": 0, | |
10 "input_connections": {}, | |
11 "inputs": [ | |
12 { | |
13 "description": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz", | |
14 "name": "Ensembl Protein FASTA (reference proteome)" | |
15 } | |
16 ], | |
17 "name": "Input dataset", | |
18 "outputs": [], | |
19 "position": { | |
20 "left": 208, | |
21 "top": 200 | |
22 }, | |
23 "tool_errors": null, | |
24 "tool_id": null, | |
25 "tool_state": "{\"name\": \"Ensembl Protein FASTA (reference proteome)\"}", | |
26 "tool_version": null, | |
27 "type": "data_input", | |
28 "user_outputs": [] | |
29 }, | |
30 "1": { | |
31 "annotation": "Ensembl reference fasta with only chromosome assigned sequences. For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz", | |
32 "id": 1, | |
33 "input_connections": {}, | |
34 "inputs": [ | |
35 { | |
36 "description": "Ensembl reference fasta with only chromosome assigned sequences. For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz", | |
37 "name": "Ensembl Genome Reference Fasta" | |
38 } | |
39 ], | |
40 "name": "Input dataset", | |
41 "outputs": [], | |
42 "position": { | |
43 "left": 209, | |
44 "top": 292 | |
45 }, | |
46 "tool_errors": null, | |
47 "tool_id": null, | |
48 "tool_state": "{\"name\": \"Ensembl Genome Reference Fasta\"}", | |
49 "tool_version": null, | |
50 "type": "data_input", | |
51 "user_outputs": [] | |
52 }, | |
53 "2": { | |
54 "annotation": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz", | |
55 "id": 2, | |
56 "input_connections": {}, | |
57 "inputs": [ | |
58 { | |
59 "description": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz", | |
60 "name": "Ensembl GTF File (gene models)" | |
61 } | |
62 ], | |
63 "name": "Input dataset", | |
64 "outputs": [], | |
65 "position": { | |
66 "left": 213, | |
67 "top": 456 | |
68 }, | |
69 "tool_errors": null, | |
70 "tool_id": null, | |
71 "tool_state": "{\"name\": \"Ensembl GTF File (gene models)\"}", | |
72 "tool_version": null, | |
73 "type": "data_input", | |
74 "user_outputs": [] | |
75 }, | |
76 "3": { | |
77 "annotation": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", | |
78 "id": 3, | |
79 "input_connections": {}, | |
80 "inputs": [ | |
81 { | |
82 "description": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", | |
83 "name": "RNA-Seq left paired-end fastq" | |
84 } | |
85 ], | |
86 "name": "Input dataset", | |
87 "outputs": [], | |
88 "position": { | |
89 "left": 220, | |
90 "top": 563 | |
91 }, | |
92 "tool_errors": null, | |
93 "tool_id": null, | |
94 "tool_state": "{\"name\": \"RNA-Seq left paired-end fastq\"}", | |
95 "tool_version": null, | |
96 "type": "data_input", | |
97 "user_outputs": [] | |
98 }, | |
99 "4": { | |
100 "annotation": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", | |
101 "id": 4, | |
102 "input_connections": {}, | |
103 "inputs": [ | |
104 { | |
105 "description": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", | |
106 "name": "RNA-Seq right paired-end fastq" | |
107 } | |
108 ], | |
109 "name": "Input dataset", | |
110 "outputs": [], | |
111 "position": { | |
112 "left": 221, | |
113 "top": 673 | |
114 }, | |
115 "tool_errors": null, | |
116 "tool_id": null, | |
117 "tool_state": "{\"name\": \"RNA-Seq right paired-end fastq\"}", | |
118 "tool_version": null, | |
119 "type": "data_input", | |
120 "user_outputs": [] | |
121 }, | |
122 "5": { | |
123 "annotation": "Convert peptide fasta to a 2-column tabular file. Keep all the head info.", | |
124 "id": 5, | |
125 "input_connections": { | |
126 "input": { | |
127 "id": 0, | |
128 "output_name": "output" | |
129 } | |
130 }, | |
131 "inputs": [], | |
132 "name": "FASTA-to-Tabular", | |
133 "outputs": [ | |
134 { | |
135 "name": "output", | |
136 "type": "tabular" | |
137 } | |
138 ], | |
139 "position": { | |
140 "left": 538, | |
141 "top": 267 | |
142 }, | |
143 "post_job_actions": {}, | |
144 "tool_errors": null, | |
145 "tool_id": "fasta2tab", | |
146 "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"__rerun_remap_job_id__\": null}", | |
147 "tool_version": "1.1.0", | |
148 "type": "tool", | |
149 "user_outputs": [] | |
150 }, | |
151 "6": { | |
152 "annotation": "Given a GTF file and the reference genome, this tool constructs a synthetic transcriptome that will be used for isoform quantification during \"-calculate expression\".", | |
153 "id": 6, | |
154 "input_connections": { | |
155 "reference|gtf": { | |
156 "id": 2, | |
157 "output_name": "output" | |
158 }, | |
159 "reference|reference_fasta_file": { | |
160 "id": 1, | |
161 "output_name": "output" | |
162 } | |
163 }, | |
164 "inputs": [], | |
165 "name": "RSEM prepare reference", | |
166 "outputs": [ | |
167 { | |
168 "name": "reference_file", | |
169 "type": "rsem_ref" | |
170 } | |
171 ], | |
172 "position": { | |
173 "left": 419, | |
174 "top": 388 | |
175 }, | |
176 "post_job_actions": {}, | |
177 "tool_errors": null, | |
178 "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_prepare_reference/1.1.17", | |
179 "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"ref_type\\\": \\\"genomic\\\", \\\"gtf\\\": null, \\\"reference_fasta_file\\\": null, \\\"__current_case__\\\": 1}\", \"reference_name\": \"\\\"primaryEnsemblGtfRef\\\"\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"polya\": \"{\\\"polya_use\\\": \\\"add\\\", \\\"polya_length\\\": \\\"125\\\", \\\"__current_case__\\\": 0}\", \"transcript_to_gene_map\": \"null\", \"ntog\": \"\\\"False\\\"\"}", | |
180 "tool_version": "1.1.17", | |
181 "type": "tool", | |
182 "user_outputs": [] | |
183 }, | |
184 "7": { | |
185 "annotation": "", | |
186 "id": 7, | |
187 "input_connections": { | |
188 "infile": { | |
189 "id": 5, | |
190 "output_name": "output" | |
191 } | |
192 }, | |
193 "inputs": [], | |
194 "name": "Regex Replace", | |
195 "outputs": [ | |
196 { | |
197 "name": "outfile", | |
198 "type": "txt" | |
199 } | |
200 ], | |
201 "position": { | |
202 "left": 802, | |
203 "top": 281 | |
204 }, | |
205 "post_job_actions": { | |
206 "ChangeDatatypeActionoutfile": { | |
207 "action_arguments": { | |
208 "newtype": "tabular" | |
209 }, | |
210 "action_type": "ChangeDatatypeAction", | |
211 "output_name": "outfile" | |
212 } | |
213 }, | |
214 "tool_errors": null, | |
215 "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0", | |
216 "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.* transcript:)(ENST\\\\\\\\d+)(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1\\\\\\\\2\\\\\\\\3\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}", | |
217 "tool_version": "1.0.0", | |
218 "type": "tool", | |
219 "user_outputs": [] | |
220 }, | |
221 "8": { | |
222 "annotation": "Given then RNA-Seq reads (fastq) and synthetic transcriptome (from \"-prepare reference\"), this tool quantifies the abundances of each mRNA transcript within the GTF file.", | |
223 "id": 8, | |
224 "input_connections": { | |
225 "input|fastq|fastq1": { | |
226 "id": 3, | |
227 "output_name": "output" | |
228 }, | |
229 "input|fastq|fastq2": { | |
230 "id": 4, | |
231 "output_name": "output" | |
232 }, | |
233 "reference|rsem_ref": { | |
234 "id": 6, | |
235 "output_name": "reference_file" | |
236 } | |
237 }, | |
238 "inputs": [], | |
239 "name": "RSEM calculate expression", | |
240 "outputs": [ | |
241 { | |
242 "name": "gene_abundances", | |
243 "type": "tabular" | |
244 }, | |
245 { | |
246 "name": "isoform_abundances", | |
247 "type": "tabular" | |
248 }, | |
249 { | |
250 "name": "transcript_bam", | |
251 "type": "bam" | |
252 }, | |
253 { | |
254 "name": "transcript_sorted_bam", | |
255 "type": "bam" | |
256 }, | |
257 { | |
258 "name": "genome_bam", | |
259 "type": "bam" | |
260 }, | |
261 { | |
262 "name": "genome_sorted_bam", | |
263 "type": "bam" | |
264 }, | |
265 { | |
266 "name": "log", | |
267 "type": "txt" | |
268 } | |
269 ], | |
270 "position": { | |
271 "left": 719, | |
272 "top": 523 | |
273 }, | |
274 "post_job_actions": {}, | |
275 "tool_errors": null, | |
276 "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_calculate_expression/1.1.17", | |
277 "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"rsem_ref\\\": null, \\\"refSrc\\\": \\\"history\\\", \\\"__current_case__\\\": 1}\", \"rsem_options\": \"{\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}\", \"rsem_outputs\": \"{\\\"result_bams\\\": \\\"none\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"seedlength\": \"\\\"25\\\"\", \"sample\": \"\\\"rsem_sample\\\"\", \"forward_prob\": \"\\\"0.5\\\"\", \"input\": \"{\\\"fastq\\\": {\\\"fastq2\\\": null, \\\"fastq1\\\": null, \\\"matepair\\\": \\\"paired\\\", \\\"__current_case__\\\": 1}, \\\"bowtie_options\\\": {\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}, \\\"fastq_select\\\": \\\"--phred33-quals\\\", \\\"__current_case__\\\": 0, \\\"format\\\": \\\"fastq\\\"}\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", | |
278 "tool_version": "1.1.17", | |
279 "type": "tool", | |
280 "user_outputs": [] | |
281 }, | |
282 "9": { | |
283 "annotation": "Selection of lower threshold of transcriptional abundance in TPM required for inclusion of the corresponding protein in the reduced database.", | |
284 "id": 9, | |
285 "input_connections": { | |
286 "input": { | |
287 "id": 8, | |
288 "output_name": "isoform_abundances" | |
289 } | |
290 }, | |
291 "inputs": [], | |
292 "name": "Filter", | |
293 "outputs": [ | |
294 { | |
295 "name": "out_file1", | |
296 "type": "input" | |
297 } | |
298 ], | |
299 "position": { | |
300 "left": 991, | |
301 "top": 591 | |
302 }, | |
303 "post_job_actions": {}, | |
304 "tool_errors": null, | |
305 "tool_id": "Filter1", | |
306 "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3>0.000001\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", | |
307 "tool_version": "1.1.0", | |
308 "type": "tool", | |
309 "user_outputs": [] | |
310 }, | |
311 "10": { | |
312 "annotation": "Add a column with the RSEM TPM times a million.", | |
313 "id": 10, | |
314 "input_connections": { | |
315 "input": { | |
316 "id": 9, | |
317 "output_name": "out_file1" | |
318 } | |
319 }, | |
320 "inputs": [], | |
321 "name": "Compute", | |
322 "outputs": [ | |
323 { | |
324 "name": "out_file1", | |
325 "type": "input" | |
326 } | |
327 ], | |
328 "position": { | |
329 "left": 1199, | |
330 "top": 574 | |
331 }, | |
332 "post_job_actions": {}, | |
333 "tool_errors": null, | |
334 "tool_id": "Add_a_column1", | |
335 "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3*1000000\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"round\": \"\\\"no\\\"\"}", | |
336 "tool_version": "1.1.0", | |
337 "type": "tool", | |
338 "user_outputs": [] | |
339 }, | |
340 "11": { | |
341 "annotation": "", | |
342 "id": 11, | |
343 "input_connections": { | |
344 "input1": { | |
345 "id": 7, | |
346 "output_name": "outfile" | |
347 }, | |
348 "input2": { | |
349 "id": 10, | |
350 "output_name": "out_file1" | |
351 } | |
352 }, | |
353 "inputs": [], | |
354 "name": "Join two Datasets", | |
355 "outputs": [ | |
356 { | |
357 "name": "out_file1", | |
358 "type": "input" | |
359 } | |
360 ], | |
361 "position": { | |
362 "left": 1350, | |
363 "top": 419 | |
364 }, | |
365 "post_job_actions": {}, | |
366 "tool_errors": null, | |
367 "tool_id": "join1", | |
368 "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"3\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"no_fill\\\", \\\"__current_case__\\\": 0}\", \"unmatched\": \"\\\"\\\"\", \"input1\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", | |
369 "tool_version": "2.0.2", | |
370 "type": "tool", | |
371 "user_outputs": [] | |
372 }, | |
373 "12": { | |
374 "annotation": "", | |
375 "id": 12, | |
376 "input_connections": { | |
377 "infile": { | |
378 "id": 11, | |
379 "output_name": "out_file1" | |
380 } | |
381 }, | |
382 "inputs": [], | |
383 "name": "Regex Replace", | |
384 "outputs": [ | |
385 { | |
386 "name": "outfile", | |
387 "type": "txt" | |
388 } | |
389 ], | |
390 "position": { | |
391 "left": 1545, | |
392 "top": 546 | |
393 }, | |
394 "post_job_actions": { | |
395 "ChangeDatatypeActionoutfile": { | |
396 "action_arguments": { | |
397 "newtype": "tabular" | |
398 }, | |
399 "action_type": "ChangeDatatypeAction", | |
400 "output_name": "outfile" | |
401 } | |
402 }, | |
403 "tool_errors": null, | |
404 "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0", | |
405 "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1 tmp:\\\\\\\\8\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}", | |
406 "tool_version": "1.0.0", | |
407 "type": "tool", | |
408 "user_outputs": [] | |
409 }, | |
410 "13": { | |
411 "annotation": "Final reduced database after application of a TPM cut-off.", | |
412 "id": 13, | |
413 "input_connections": { | |
414 "input": { | |
415 "id": 12, | |
416 "output_name": "outfile" | |
417 } | |
418 }, | |
419 "inputs": [], | |
420 "name": "Tabular-to-FASTA", | |
421 "outputs": [ | |
422 { | |
423 "name": "output", | |
424 "type": "fasta" | |
425 } | |
426 ], | |
427 "position": { | |
428 "left": 1743, | |
429 "top": 484 | |
430 }, | |
431 "post_job_actions": {}, | |
432 "tool_errors": null, | |
433 "tool_id": "tab2fasta", | |
434 "tool_state": "{\"title_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\", \"__page__\": 0, \"seq_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"__rerun_remap_job_id__\": null, \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", | |
435 "tool_version": "1.1.0", | |
436 "type": "tool", | |
437 "user_outputs": [] | |
438 }, | |
439 "14": { | |
440 "annotation": "Format FASTA to desired width.", | |
441 "id": 14, | |
442 "input_connections": { | |
443 "input": { | |
444 "id": 13, | |
445 "output_name": "output" | |
446 } | |
447 }, | |
448 "inputs": [], | |
449 "name": "FASTA Width", | |
450 "outputs": [ | |
451 { | |
452 "name": "output", | |
453 "type": "input" | |
454 } | |
455 ], | |
456 "position": { | |
457 "left": 1939, | |
458 "top": 569 | |
459 }, | |
460 "post_job_actions": {}, | |
461 "tool_errors": null, | |
462 "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_formatter/cshl_fasta_formatter/1.0.0", | |
463 "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"width\": \"\\\"80\\\"\"}", | |
464 "tool_version": "1.0.0", | |
465 "type": "tool", | |
466 "user_outputs": [] | |
467 } | |
468 } | |
469 } |