Mercurial > repos > galaxyp > proteomics_rnaseq_reduced_db_workflow

{
    "a_galaxy_workflow": "true",
    "annotation": "Filter out proteins that have a transcript expression level, as quantified by RNA-Seq data, below a certain threshold.",
    "format-version": "0.1",
    "name": "Proteomics Reduced DB v2",
    "steps": {
        "0": {
            "annotation": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz",
            "id": 0,
            "input_connections": {},
            "inputs": [
                {
                    "description": "ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/pep/Homo_sapiens.GRCh37.73.pep.all.fa.gz",
                    "name": "Ensembl Protein FASTA (reference proteome)"
                }
            ],
            "name": "Input dataset",
            "outputs": [],
            "position": {
                "left": 208,
                "top": 200
            },
            "tool_errors": null,
            "tool_id": null,
            "tool_state": "{\"name\": \"Ensembl Protein FASTA (reference proteome)\"}",
            "tool_version": null,
            "type": "data_input",
            "user_outputs": []
        },
        "1": {
            "annotation": "Ensembl reference fasta with only chromosome assigned sequences.   For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz",
            "id": 1,
            "input_connections": {},
            "inputs": [
                {
                    "description": "Ensembl reference fasta with only chromosome assigned sequences.   For example: ftp://ftp.ensembl.org/pub/release-73/fasta/homo_sapiens/dna/Homo_sapiens.GRCh37.73.dna.toplevel.fa.gz",
                    "name": "Ensembl Genome Reference Fasta"
                }
            ],
            "name": "Input dataset",
            "outputs": [],
            "position": {
                "left": 209,
                "top": 292
            },
            "tool_errors": null,
            "tool_id": null,
            "tool_state": "{\"name\": \"Ensembl Genome Reference Fasta\"}",
            "tool_version": null,
            "type": "data_input",
            "user_outputs": []
        },
        "2": {
            "annotation": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz",
            "id": 2,
            "input_connections": {},
            "inputs": [
                {
                    "description": "For example: \nftp://ftp.ensembl.org/pub/release-73/gtf/homo_sapiens/Homo_sapiens.GRCh37.73.gtf.gz",
                    "name": "Ensembl GTF File (gene models)"
                }
            ],
            "name": "Input dataset",
            "outputs": [],
            "position": {
                "left": 213,
                "top": 456
            },
            "tool_errors": null,
            "tool_id": null,
            "tool_state": "{\"name\": \"Ensembl GTF File (gene models)\"}",
            "tool_version": null,
            "type": "data_input",
            "user_outputs": []
        },
        "3": {
            "annotation": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
            "id": 3,
            "input_connections": {},
            "inputs": [
                {
                    "description": "RNA-Seq left mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
                    "name": "RNA-Seq left paired-end fastq"
                }
            ],
            "name": "Input dataset",
            "outputs": [],
            "position": {
                "left": 220,
                "top": 563
            },
            "tool_errors": null,
            "tool_id": null,
            "tool_state": "{\"name\": \"RNA-Seq left paired-end fastq\"}",
            "tool_version": null,
            "type": "data_input",
            "user_outputs": []
        },
        "4": {
            "annotation": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
            "id": 4,
            "input_connections": {},
            "inputs": [
                {
                    "description": "RNA-Seq right mate pair fastq (These should be in fastqsanger format. If not, convert with \"Fastq Groomer\" tool.)",
                    "name": "RNA-Seq right paired-end fastq"
                }
            ],
            "name": "Input dataset",
            "outputs": [],
            "position": {
                "left": 221,
                "top": 673
            },
            "tool_errors": null,
            "tool_id": null,
            "tool_state": "{\"name\": \"RNA-Seq right paired-end fastq\"}",
            "tool_version": null,
            "type": "data_input",
            "user_outputs": []
        },
        "5": {
            "annotation": "Convert peptide fasta to a 2-column tabular file.  Keep all the head info.",
            "id": 5,
            "input_connections": {
                "input": {
                    "id": 0,
                    "output_name": "output"
                }
            },
            "inputs": [],
            "name": "FASTA-to-Tabular",
            "outputs": [
                {
                    "name": "output",
                    "type": "tabular"
                }
            ],
            "position": {
                "left": 538,
                "top": 267
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "fasta2tab",
            "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"__rerun_remap_job_id__\": null}",
            "tool_version": "1.1.0",
            "type": "tool",
            "user_outputs": []
        },
        "6": {
            "annotation": "Given a GTF file and the reference genome, this tool constructs a synthetic transcriptome that will be used for isoform quantification during \"-calculate expression\".",
            "id": 6,
            "input_connections": {
                "reference|gtf": {
                    "id": 2,
                    "output_name": "output"
                },
                "reference|reference_fasta_file": {
                    "id": 1,
                    "output_name": "output"
                }
            },
            "inputs": [],
            "name": "RSEM prepare reference",
            "outputs": [
                {
                    "name": "reference_file",
                    "type": "rsem_ref"
                }
            ],
            "position": {
                "left": 419,
                "top": 388
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_prepare_reference/1.1.17",
            "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"ref_type\\\": \\\"genomic\\\", \\\"gtf\\\": null, \\\"reference_fasta_file\\\": null, \\\"__current_case__\\\": 1}\", \"reference_name\": \"\\\"primaryEnsemblGtfRef\\\"\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"polya\": \"{\\\"polya_use\\\": \\\"add\\\", \\\"polya_length\\\": \\\"125\\\", \\\"__current_case__\\\": 0}\", \"transcript_to_gene_map\": \"null\", \"ntog\": \"\\\"False\\\"\"}",
            "tool_version": "1.1.17",
            "type": "tool",
            "user_outputs": []
        },
        "7": {
            "annotation": "",
            "id": 7,
            "input_connections": {
                "infile": {
                    "id": 5,
                    "output_name": "output"
                }
            },
            "inputs": [],
            "name": "Regex Replace",
            "outputs": [
                {
                    "name": "outfile",
                    "type": "txt"
                }
            ],
            "position": {
                "left": 802,
                "top": 281
            },
            "post_job_actions": {
                "ChangeDatatypeActionoutfile": {
                    "action_arguments": {
                        "newtype": "tabular"
                    },
                    "action_type": "ChangeDatatypeAction",
                    "output_name": "outfile"
                }
            },
            "tool_errors": null,
            "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0",
            "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.* transcript:)(ENST\\\\\\\\d+)(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1\\\\\\\\2\\\\\\\\3\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}",
            "tool_version": "1.0.0",
            "type": "tool",
            "user_outputs": []
        },
        "8": {
            "annotation": "Given then RNA-Seq reads (fastq) and synthetic transcriptome (from \"-prepare reference\"), this tool quantifies the abundances of each mRNA transcript within the GTF file.",
            "id": 8,
            "input_connections": {
                "input|fastq|fastq1": {
                    "id": 3,
                    "output_name": "output"
                },
                "input|fastq|fastq2": {
                    "id": 4,
                    "output_name": "output"
                },
                "reference|rsem_ref": {
                    "id": 6,
                    "output_name": "reference_file"
                }
            },
            "inputs": [],
            "name": "RSEM calculate expression",
            "outputs": [
                {
                    "name": "gene_abundances",
                    "type": "tabular"
                },
                {
                    "name": "isoform_abundances",
                    "type": "tabular"
                },
                {
                    "name": "transcript_bam",
                    "type": "bam"
                },
                {
                    "name": "transcript_sorted_bam",
                    "type": "bam"
                },
                {
                    "name": "genome_bam",
                    "type": "bam"
                },
                {
                    "name": "genome_sorted_bam",
                    "type": "bam"
                },
                {
                    "name": "log",
                    "type": "txt"
                }
            ],
            "position": {
                "left": 719,
                "top": 523
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/rsem/rsem_calculate_expression/1.1.17",
            "tool_state": "{\"__page__\": 0, \"reference\": \"{\\\"rsem_ref\\\": null, \\\"refSrc\\\": \\\"history\\\", \\\"__current_case__\\\": 1}\", \"rsem_options\": \"{\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}\", \"rsem_outputs\": \"{\\\"result_bams\\\": \\\"none\\\", \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"seedlength\": \"\\\"25\\\"\", \"sample\": \"\\\"rsem_sample\\\"\", \"forward_prob\": \"\\\"0.5\\\"\", \"input\": \"{\\\"fastq\\\": {\\\"fastq2\\\": null, \\\"fastq1\\\": null, \\\"matepair\\\": \\\"paired\\\", \\\"__current_case__\\\": 1}, \\\"bowtie_options\\\": {\\\"fullparams\\\": \\\"default\\\", \\\"__current_case__\\\": 0}, \\\"fastq_select\\\": \\\"--phred33-quals\\\", \\\"__current_case__\\\": 0, \\\"format\\\": \\\"fastq\\\"}\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
            "tool_version": "1.1.17",
            "type": "tool",
            "user_outputs": []
        },
        "9": {
            "annotation": "Selection of lower threshold of transcriptional abundance in TPM required for inclusion of the corresponding protein in the reduced database.",
            "id": 9,
            "input_connections": {
                "input": {
                    "id": 8,
                    "output_name": "isoform_abundances"
                }
            },
            "inputs": [],
            "name": "Filter",
            "outputs": [
                {
                    "name": "out_file1",
                    "type": "input"
                }
            ],
            "position": {
                "left": 991,
                "top": 591
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "Filter1",
            "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3>0.000001\\\"\", \"input\": \"null\", \"header_lines\": \"\\\"0\\\"\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
            "tool_version": "1.1.0",
            "type": "tool",
            "user_outputs": []
        },
        "10": {
            "annotation": "Add a column with the  RSEM TPM times a million.",
            "id": 10,
            "input_connections": {
                "input": {
                    "id": 9,
                    "output_name": "out_file1"
                }
            },
            "inputs": [],
            "name": "Compute",
            "outputs": [
                {
                    "name": "out_file1",
                    "type": "input"
                }
            ],
            "position": {
                "left": 1199,
                "top": 574
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "Add_a_column1",
            "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"cond\": \"\\\"c3*1000000\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"round\": \"\\\"no\\\"\"}",
            "tool_version": "1.1.0",
            "type": "tool",
            "user_outputs": []
        },
        "11": {
            "annotation": "",
            "id": 11,
            "input_connections": {
                "input1": {
                    "id": 7,
                    "output_name": "outfile"
                },
                "input2": {
                    "id": 10,
                    "output_name": "out_file1"
                }
            },
            "inputs": [],
            "name": "Join two Datasets",
            "outputs": [
                {
                    "name": "out_file1",
                    "type": "input"
                }
            ],
            "position": {
                "left": 1350,
                "top": 419
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "join1",
            "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"field1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"3\\\"}\", \"partial\": \"\\\"\\\"\", \"field2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"__rerun_remap_job_id__\": null, \"fill_empty_columns\": \"{\\\"fill_empty_columns_switch\\\": \\\"no_fill\\\", \\\"__current_case__\\\": 0}\", \"unmatched\": \"\\\"\\\"\", \"input1\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
            "tool_version": "2.0.2",
            "type": "tool",
            "user_outputs": []
        },
        "12": {
            "annotation": "",
            "id": 12,
            "input_connections": {
                "infile": {
                    "id": 11,
                    "output_name": "out_file1"
                }
            },
            "inputs": [],
            "name": "Regex Replace",
            "outputs": [
                {
                    "name": "outfile",
                    "type": "txt"
                }
            ],
            "position": {
                "left": 1545,
                "top": 546
            },
            "post_job_actions": {
                "ChangeDatatypeActionoutfile": {
                    "action_arguments": {
                        "newtype": "tabular"
                    },
                    "action_type": "ChangeDatatypeAction",
                    "output_name": "outfile"
                }
            },
            "tool_errors": null,
            "tool_id": "toolshed.g2.bx.psu.edu/repos/kellrott/regex_replace/regex_replace/1.0.0",
            "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"search_str\": \"\\\"^(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)\\\\\\\\t(.*)$\\\"\", \"__rerun_remap_job_id__\": null, \"replace_str\": \"\\\"\\\\\\\\1 tmp:\\\\\\\\8\\\\\\\\t\\\\\\\\2\\\"\", \"replace_count\": \"\\\"0\\\"\", \"multiline\": \"\\\"False\\\"\", \"infile\": \"null\", \"dot_all\": \"\\\"False\\\"\"}",
            "tool_version": "1.0.0",
            "type": "tool",
            "user_outputs": []
        },
        "13": {
            "annotation": "Final reduced database after application of a TPM cut-off.",
            "id": 13,
            "input_connections": {
                "input": {
                    "id": 12,
                    "output_name": "outfile"
                }
            },
            "inputs": [],
            "name": "Tabular-to-FASTA",
            "outputs": [
                {
                    "name": "output",
                    "type": "fasta"
                }
            ],
            "position": {
                "left": 1743,
                "top": 484
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "tab2fasta",
            "tool_state": "{\"title_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\", \"__page__\": 0, \"seq_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"__rerun_remap_job_id__\": null, \"input\": \"null\", \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\"}",
            "tool_version": "1.1.0",
            "type": "tool",
            "user_outputs": []
        },
        "14": {
            "annotation": "Format FASTA to desired width.",
            "id": 14,
            "input_connections": {
                "input": {
                    "id": 13,
                    "output_name": "output"
                }
            },
            "inputs": [],
            "name": "FASTA Width",
            "outputs": [
                {
                    "name": "output",
                    "type": "input"
                }
            ],
            "position": {
                "left": 1939,
                "top": 569
            },
            "post_job_actions": {},
            "tool_errors": null,
            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fasta_formatter/cshl_fasta_formatter/1.0.0",
            "tool_state": "{\"__page__\": 0, \"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/website/galaxy.msi.umn.edu/PRODUCTION/tool-data/shared/ucsc/chrom/GRCm38_canon.len\\\"\", \"width\": \"\\\"80\\\"\"}",
            "tool_version": "1.0.0",
            "type": "tool",
            "user_outputs": []
        }
    }
}
author	Jim Johnson <jj@umn.edu>
date	Thu, 20 Mar 2014 21:50:05 -0500
parents
children