Mercurial > repos > galaxyp > proteomics_rnaseq_reduced_db_workflow
view proteomics_rnaseq_reduced_db_workflow_v2.ga @ 1:20d9fb1ba210 default tip
Replace several tabular manipulations with regex_replace tool
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Thu, 20 Mar 2014 21:50:05 -0500 |
parents | |
children |
line wrap: on
line source
{ "a_galaxy_workflow": "true", "annotation": "Filter out proteins that have a transcript expression level, as quantified by RNA-Seq data, below a certain threshold.", "format-version": "0.1", "name": "Proteomics Reduced DB v2", "steps": { "0": { "annotation": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz", "id": 0, "input_connections": {}, "inputs": [ { "description": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz", "name": "Ensembl Protein FASTA (reference proteome)" } ], "name": "Input dataset", "outputs": [], "position": { "left": 208, "top": 200 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Ensembl Protein FASTA (reference proteome)\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "1": { "annotation": "Ensembl reference fasta with only chromosome assigned sequences. For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz", "id": 1, "input_connections": {}, "inputs": [ { "description": "Ensembl reference fasta with only chromosome assigned sequences. For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz", "name": "Ensembl Genome Reference Fasta" } ], "name": "Input dataset", "outputs": [], "position": { "left": 209, "top": 292 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Ensembl Genome Reference Fasta\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "2": { "annotation": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz", "id": 2, "input_connections": {}, "inputs": [ { "description": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz", "name": "Ensembl GTF File (gene models)" } ], "name": "Input dataset", "outputs": [], "position": { "left": 213, "top": 456 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Ensembl GTF File (gene models)\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "3": { "annotation": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", "id": 3, "input_connections": {}, "inputs": [ { "description": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", "name": "RNA-Seq left paired-end fastq" } ], "name": "Input dataset", "outputs": [], "position": { "left": 220, "top": 563 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"RNA-Seq left paired-end fastq\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "4": { "annotation": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", "id": 4, "input_connections": {}, "inputs": [ { "description": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", "name": "RNA-Seq right paired-end fastq" } ], "name": "Input dataset", "outputs": [], "position": { "left": 221, "top": 673 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"RNA-Seq right paired-end fastq\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "5": { "annotation": "Convert peptide fasta to a 2-column tabular file. Keep all the head info.", "id": 5, "input_connections": { "input": { "id": 0, "output_name": "output" } }, "inputs": [], "name": "FASTA-to-Tabular", "outputs": [ { "name": "output", "type": "tabular" } ], "position": { "left": 538, "top": 267 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "fasta2tab", "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"__rerun_remap_job_id__\": null}", "tool_version": "1.1.0", "type": "tool", "user_outputs": [] }, "6": { "annotation": "Given a GTF file and the reference genome, this tool constructs a synthetic transcriptome that will be used for isoform quantification during \"-calculate expression\".", "id": 6, "input_connections": { "reference|gtf": { "id": 2, "output_name": "output" }, "reference|reference_fasta_file": { "id": 1, "output_name": "output" } }, "inputs": [], "name": "RSEM prepare reference", "outputs": [ { "name": "reference_file", "type": "rsem_ref" } ], "position": { "left": 419, "top": 388 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_prepare_reference/1.1.17", "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"ref_type\\\": \\\"genomic\\\", \\\"gtf\\\": null, \\\"reference_fasta_file\\\": null, \\\"__current_case__\\\": 1}\", \"reference_name\": \"\\\"primaryEnsemblGtfRef\\\"\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"polya\": \"{\\\"polya_use\\\": \\\"add\\\", \\\"polya_length\\\": \\\"125\\\", \\\"__current_case__\\\": 0}\", \"transcript_to_gene_map\": \"null\", \"ntog\": \"\\\"False\\\"\"}", "tool_version": "1.1.17", "type": "tool", "user_outputs": [] }, "7": { "annotation": "", "id": 7, "input_connections": { "infile": { "id": 5, "output_name": "output" } }, "inputs": [], "name": "Regex Replace", "outputs": [ { "name": "outfile", "type": "txt" } ], "position": { "left": 802, "top": 281 }, "post_job_actions": { "ChangeDatatypeActionoutfile": { "action_arguments": { "newtype": "tabular" }, "action_type": "ChangeDatatypeAction", "output_name": "outfile" } }, "tool_errors": null, "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0", "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.* transcript:)(ENST\\\\\\\\d+)(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1\\\\\\\\2\\\\\\\\3\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [] }, "8": { "annotation": "Given then RNA-Seq reads (fastq) and synthetic transcriptome (from \"-prepare reference\"), this tool quantifies the abundances of each mRNA transcript within the GTF file.", "id": 8, "input_connections": { "input|fastq|fastq1": { "id": 3, "output_name": "output" }, "input|fastq|fastq2": { "id": 4, "output_name": "output" }, "reference|rsem_ref": { "id": 6, "output_name": "reference_file" } }, "inputs": [], "name": "RSEM calculate expression", "outputs": [ { "name": "gene_abundances", "type": "tabular" }, { "name": "isoform_abundances", "type": "tabular" }, { "name": "transcript_bam", "type": "bam" }, { "name": "transcript_sorted_bam", "type": "bam" }, { "name": "genome_bam", "type": "bam" }, { "name": "genome_sorted_bam", "type": "bam" }, { "name": "log", "type": "txt" } ], "position": { "left": 719, "top": 523 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_calculate_expression/1.1.17", "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"rsem_ref\\\": null, \\\"refSrc\\\": \\\"history\\\", \\\"__current_case__\\\": 1}\", \"rsem_options\": \"{\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}\", \"rsem_outputs\": \"{\\\"result_bams\\\": \\\"none\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"seedlength\": \"\\\"25\\\"\", \"sample\": \"\\\"rsem_sample\\\"\", \"forward_prob\": \"\\\"0.5\\\"\", \"input\": \"{\\\"fastq\\\": {\\\"fastq2\\\": null, \\\"fastq1\\\": null, \\\"matepair\\\": \\\"paired\\\", \\\"__current_case__\\\": 1}, \\\"bowtie_options\\\": {\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}, \\\"fastq_select\\\": \\\"--phred33-quals\\\", \\\"__current_case__\\\": 0, \\\"format\\\": \\\"fastq\\\"}\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", "tool_version": "1.1.17", "type": "tool", "user_outputs": [] }, "9": { "annotation": "Selection of lower threshold of transcriptional abundance in TPM required for inclusion of the corresponding protein in the reduced database.", "id": 9, "input_connections": { "input": { "id": 8, "output_name": "isoform_abundances" } }, "inputs": [], "name": "Filter", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 991, "top": 591 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "Filter1", "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3>0.000001\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", "tool_version": "1.1.0", "type": "tool", "user_outputs": [] }, "10": { "annotation": "Add a column with the RSEM TPM times a million.", "id": 10, "input_connections": { "input": { "id": 9, "output_name": "out_file1" } }, "inputs": [], "name": "Compute", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 1199, "top": 574 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "Add_a_column1", "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3*1000000\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"round\": \"\\\"no\\\"\"}", "tool_version": "1.1.0", "type": "tool", "user_outputs": [] }, "11": { "annotation": "", "id": 11, "input_connections": { "input1": { "id": 7, "output_name": "outfile" }, "input2": { "id": 10, "output_name": "out_file1" } }, "inputs": [], "name": "Join two Datasets", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 1350, "top": 419 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "join1", "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"3\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"no_fill\\\", \\\"__current_case__\\\": 0}\", \"unmatched\": \"\\\"\\\"\", \"input1\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", "tool_version": "2.0.2", "type": "tool", "user_outputs": [] }, "12": { "annotation": "", "id": 12, "input_connections": { "infile": { "id": 11, "output_name": "out_file1" } }, "inputs": [], "name": "Regex Replace", "outputs": [ { "name": "outfile", "type": "txt" } ], "position": { "left": 1545, "top": 546 }, "post_job_actions": { "ChangeDatatypeActionoutfile": { "action_arguments": { "newtype": "tabular" }, "action_type": "ChangeDatatypeAction", "output_name": "outfile" } }, "tool_errors": null, "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0", "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1 tmp:\\\\\\\\8\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [] }, "13": { "annotation": "Final reduced database after application of a TPM cut-off.", "id": 13, "input_connections": { "input": { "id": 12, "output_name": "outfile" } }, "inputs": [], "name": "Tabular-to-FASTA", "outputs": [ { "name": "output", "type": "fasta" } ], "position": { "left": 1743, "top": 484 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "tab2fasta", "tool_state": "{\"title_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\", \"__page__\": 0, \"seq_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"__rerun_remap_job_id__\": null, \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", "tool_version": "1.1.0", "type": "tool", "user_outputs": [] }, "14": { "annotation": "Format FASTA to desired width.", "id": 14, "input_connections": { "input": { "id": 13, "output_name": "output" } }, "inputs": [], "name": "FASTA Width", "outputs": [ { "name": "output", "type": "input" } ], "position": { "left": 1939, "top": 569 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_formatter/cshl_fasta_formatter/1.0.0", "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"width\": \"\\\"80\\\"\"}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [] } } }