view proteomics_rnaseq_reduced_db_workflow_v2.ga @ 1:20d9fb1ba210 default tip

Replace several tabular manipulations with regex_replace tool
author Jim Johnson <jj@umn.edu>
date Thu, 20 Mar 2014 21:50:05 -0500
parents
children
line wrap: on
line source

{
    "a_galaxy_workflow": "true", 
    "annotation": "Filter out proteins that have a transcript expression level, as quantified by RNA-Seq data, below a certain threshold.", 
    "format-version": "0.1", 
    "name": "Proteomics Reduced DB v2", 
    "steps": {
        "0": {
            "annotation": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz", 
            "id": 0, 
            "input_connections": {}, 
            "inputs": [
                {
                    "description": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz", 
                    "name": "Ensembl Protein FASTA (reference proteome)"
                }
            ], 
            "name": "Input dataset", 
            "outputs": [], 
            "position": {
                "left": 208, 
                "top": 200
            }, 
            "tool_errors": null, 
            "tool_id": null, 
            "tool_state": "{\"name\": \"Ensembl Protein FASTA (reference proteome)\"}", 
            "tool_version": null, 
            "type": "data_input", 
            "user_outputs": []
        }, 
        "1": {
            "annotation": "Ensembl reference fasta with only chromosome assigned sequences.   For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz", 
            "id": 1, 
            "input_connections": {}, 
            "inputs": [
                {
                    "description": "Ensembl reference fasta with only chromosome assigned sequences.   For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz", 
                    "name": "Ensembl Genome Reference Fasta"
                }
            ], 
            "name": "Input dataset", 
            "outputs": [], 
            "position": {
                "left": 209, 
                "top": 292
            }, 
            "tool_errors": null, 
            "tool_id": null, 
            "tool_state": "{\"name\": \"Ensembl Genome Reference Fasta\"}", 
            "tool_version": null, 
            "type": "data_input", 
            "user_outputs": []
        }, 
        "2": {
            "annotation": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz", 
            "id": 2, 
            "input_connections": {}, 
            "inputs": [
                {
                    "description": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz", 
                    "name": "Ensembl GTF File (gene models)"
                }
            ], 
            "name": "Input dataset", 
            "outputs": [], 
            "position": {
                "left": 213, 
                "top": 456
            }, 
            "tool_errors": null, 
            "tool_id": null, 
            "tool_state": "{\"name\": \"Ensembl GTF File (gene models)\"}", 
            "tool_version": null, 
            "type": "data_input", 
            "user_outputs": []
        }, 
        "3": {
            "annotation": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", 
            "id": 3, 
            "input_connections": {}, 
            "inputs": [
                {
                    "description": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", 
                    "name": "RNA-Seq left paired-end fastq"
                }
            ], 
            "name": "Input dataset", 
            "outputs": [], 
            "position": {
                "left": 220, 
                "top": 563
            }, 
            "tool_errors": null, 
            "tool_id": null, 
            "tool_state": "{\"name\": \"RNA-Seq left paired-end fastq\"}", 
            "tool_version": null, 
            "type": "data_input", 
            "user_outputs": []
        }, 
        "4": {
            "annotation": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", 
            "id": 4, 
            "input_connections": {}, 
            "inputs": [
                {
                    "description": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)", 
                    "name": "RNA-Seq right paired-end fastq"
                }
            ], 
            "name": "Input dataset", 
            "outputs": [], 
            "position": {
                "left": 221, 
                "top": 673
            }, 
            "tool_errors": null, 
            "tool_id": null, 
            "tool_state": "{\"name\": \"RNA-Seq right paired-end fastq\"}", 
            "tool_version": null, 
            "type": "data_input", 
            "user_outputs": []
        }, 
        "5": {
            "annotation": "Convert peptide fasta to a 2-column tabular file.  Keep all the head info.", 
            "id": 5, 
            "input_connections": {
                "input": {
                    "id": 0, 
                    "output_name": "output"
                }
            }, 
            "inputs": [], 
            "name": "FASTA-to-Tabular", 
            "outputs": [
                {
                    "name": "output", 
                    "type": "tabular"
                }
            ], 
            "position": {
                "left": 538, 
                "top": 267
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "fasta2tab", 
            "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"__rerun_remap_job_id__\": null}", 
            "tool_version": "1.1.0", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "6": {
            "annotation": "Given a GTF file and the reference genome, this tool constructs a synthetic transcriptome that will be used for isoform quantification during \"-calculate expression\".", 
            "id": 6, 
            "input_connections": {
                "reference|gtf": {
                    "id": 2, 
                    "output_name": "output"
                }, 
                "reference|reference_fasta_file": {
                    "id": 1, 
                    "output_name": "output"
                }
            }, 
            "inputs": [], 
            "name": "RSEM prepare reference", 
            "outputs": [
                {
                    "name": "reference_file", 
                    "type": "rsem_ref"
                }
            ], 
            "position": {
                "left": 419, 
                "top": 388
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_prepare_reference/1.1.17", 
            "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"ref_type\\\": \\\"genomic\\\", \\\"gtf\\\": null, \\\"reference_fasta_file\\\": null, \\\"__current_case__\\\": 1}\", \"reference_name\": \"\\\"primaryEnsemblGtfRef\\\"\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"polya\": \"{\\\"polya_use\\\": \\\"add\\\", \\\"polya_length\\\": \\\"125\\\", \\\"__current_case__\\\": 0}\", \"transcript_to_gene_map\": \"null\", \"ntog\": \"\\\"False\\\"\"}", 
            "tool_version": "1.1.17", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "7": {
            "annotation": "", 
            "id": 7, 
            "input_connections": {
                "infile": {
                    "id": 5, 
                    "output_name": "output"
                }
            }, 
            "inputs": [], 
            "name": "Regex Replace", 
            "outputs": [
                {
                    "name": "outfile", 
                    "type": "txt"
                }
            ], 
            "position": {
                "left": 802, 
                "top": 281
            }, 
            "post_job_actions": {
                "ChangeDatatypeActionoutfile": {
                    "action_arguments": {
                        "newtype": "tabular"
                    }, 
                    "action_type": "ChangeDatatypeAction", 
                    "output_name": "outfile"
                }
            }, 
            "tool_errors": null, 
            "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0", 
            "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.* transcript:)(ENST\\\\\\\\d+)(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1\\\\\\\\2\\\\\\\\3\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}", 
            "tool_version": "1.0.0", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "8": {
            "annotation": "Given then RNA-Seq reads (fastq) and synthetic transcriptome (from \"-prepare reference\"), this tool quantifies the abundances of each mRNA transcript within the GTF file.", 
            "id": 8, 
            "input_connections": {
                "input|fastq|fastq1": {
                    "id": 3, 
                    "output_name": "output"
                }, 
                "input|fastq|fastq2": {
                    "id": 4, 
                    "output_name": "output"
                }, 
                "reference|rsem_ref": {
                    "id": 6, 
                    "output_name": "reference_file"
                }
            }, 
            "inputs": [], 
            "name": "RSEM calculate expression", 
            "outputs": [
                {
                    "name": "gene_abundances", 
                    "type": "tabular"
                }, 
                {
                    "name": "isoform_abundances", 
                    "type": "tabular"
                }, 
                {
                    "name": "transcript_bam", 
                    "type": "bam"
                }, 
                {
                    "name": "transcript_sorted_bam", 
                    "type": "bam"
                }, 
                {
                    "name": "genome_bam", 
                    "type": "bam"
                }, 
                {
                    "name": "genome_sorted_bam", 
                    "type": "bam"
                }, 
                {
                    "name": "log", 
                    "type": "txt"
                }
            ], 
            "position": {
                "left": 719, 
                "top": 523
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_calculate_expression/1.1.17", 
            "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"rsem_ref\\\": null, \\\"refSrc\\\": \\\"history\\\", \\\"__current_case__\\\": 1}\", \"rsem_options\": \"{\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}\", \"rsem_outputs\": \"{\\\"result_bams\\\": \\\"none\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"seedlength\": \"\\\"25\\\"\", \"sample\": \"\\\"rsem_sample\\\"\", \"forward_prob\": \"\\\"0.5\\\"\", \"input\": \"{\\\"fastq\\\": {\\\"fastq2\\\": null, \\\"fastq1\\\": null, \\\"matepair\\\": \\\"paired\\\", \\\"__current_case__\\\": 1}, \\\"bowtie_options\\\": {\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}, \\\"fastq_select\\\": \\\"--phred33-quals\\\", \\\"__current_case__\\\": 0, \\\"format\\\": \\\"fastq\\\"}\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", 
            "tool_version": "1.1.17", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "9": {
            "annotation": "Selection of lower threshold of transcriptional abundance in TPM required for inclusion of the corresponding protein in the reduced database.", 
            "id": 9, 
            "input_connections": {
                "input": {
                    "id": 8, 
                    "output_name": "isoform_abundances"
                }
            }, 
            "inputs": [], 
            "name": "Filter", 
            "outputs": [
                {
                    "name": "out_file1", 
                    "type": "input"
                }
            ], 
            "position": {
                "left": 991, 
                "top": 591
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "Filter1", 
            "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3>0.000001\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", 
            "tool_version": "1.1.0", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "10": {
            "annotation": "Add a column with the  RSEM TPM times a million.", 
            "id": 10, 
            "input_connections": {
                "input": {
                    "id": 9, 
                    "output_name": "out_file1"
                }
            }, 
            "inputs": [], 
            "name": "Compute", 
            "outputs": [
                {
                    "name": "out_file1", 
                    "type": "input"
                }
            ], 
            "position": {
                "left": 1199, 
                "top": 574
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "Add_a_column1", 
            "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3*1000000\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"round\": \"\\\"no\\\"\"}", 
            "tool_version": "1.1.0", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "11": {
            "annotation": "", 
            "id": 11, 
            "input_connections": {
                "input1": {
                    "id": 7, 
                    "output_name": "outfile"
                }, 
                "input2": {
                    "id": 10, 
                    "output_name": "out_file1"
                }
            }, 
            "inputs": [], 
            "name": "Join two Datasets", 
            "outputs": [
                {
                    "name": "out_file1", 
                    "type": "input"
                }
            ], 
            "position": {
                "left": 1350, 
                "top": 419
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "join1", 
            "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"3\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"no_fill\\\", \\\"__current_case__\\\": 0}\", \"unmatched\": \"\\\"\\\"\", \"input1\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", 
            "tool_version": "2.0.2", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "12": {
            "annotation": "", 
            "id": 12, 
            "input_connections": {
                "infile": {
                    "id": 11, 
                    "output_name": "out_file1"
                }
            }, 
            "inputs": [], 
            "name": "Regex Replace", 
            "outputs": [
                {
                    "name": "outfile", 
                    "type": "txt"
                }
            ], 
            "position": {
                "left": 1545, 
                "top": 546
            }, 
            "post_job_actions": {
                "ChangeDatatypeActionoutfile": {
                    "action_arguments": {
                        "newtype": "tabular"
                    }, 
                    "action_type": "ChangeDatatypeAction", 
                    "output_name": "outfile"
                }
            }, 
            "tool_errors": null, 
            "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0", 
            "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1 tmp:\\\\\\\\8\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}", 
            "tool_version": "1.0.0", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "13": {
            "annotation": "Final reduced database after application of a TPM cut-off.", 
            "id": 13, 
            "input_connections": {
                "input": {
                    "id": 12, 
                    "output_name": "outfile"
                }
            }, 
            "inputs": [], 
            "name": "Tabular-to-FASTA", 
            "outputs": [
                {
                    "name": "output", 
                    "type": "fasta"
                }
            ], 
            "position": {
                "left": 1743, 
                "top": 484
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "tab2fasta", 
            "tool_state": "{\"title_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\", \"__page__\": 0, \"seq_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"__rerun_remap_job_id__\": null, \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}", 
            "tool_version": "1.1.0", 
            "type": "tool", 
            "user_outputs": []
        }, 
        "14": {
            "annotation": "Format FASTA to desired width.", 
            "id": 14, 
            "input_connections": {
                "input": {
                    "id": 13, 
                    "output_name": "output"
                }
            }, 
            "inputs": [], 
            "name": "FASTA Width", 
            "outputs": [
                {
                    "name": "output", 
                    "type": "input"
                }
            ], 
            "position": {
                "left": 1939, 
                "top": 569
            }, 
            "post_job_actions": {}, 
            "tool_errors": null, 
            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_formatter/cshl_fasta_formatter/1.0.0", 
            "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"width\": \"\\\"80\\\"\"}", 
            "tool_version": "1.0.0", 
            "type": "tool", 
            "user_outputs": []
        }
    }
}