Previous changeset 5:06470b2e491f (2018-06-25) Next changeset 7:e732a731f778 (2022-09-01) |
Commit message:
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/workflows/GeneSeqToFamily commit 3dbeddc06c9d15aadcc66a7eb7376c29da9233a3" |
modified:
GeneSeqToFamily.ga GeneSeqToFamily.png |
added:
tutorial.md |
b |
diff -r 06470b2e491f -r 266800d51605 GeneSeqToFamily.ga --- a/GeneSeqToFamily.ga Mon Jun 25 18:50:55 2018 -0400 +++ b/GeneSeqToFamily.ga Thu Jun 10 16:18:59 2021 +0000 |
[ |
b'@@ -20,8 +20,8 @@\n "name": "Input dataset", \n "outputs": [], \n "position": {\n- "left": 200, \n- "top": 266\n+ "left": 225.984375, \n+ "top": 532.5\n }, \n "tool_id": null, \n "tool_state": "{\\"name\\": \\"CDS\\"}", \n@@ -45,26 +45,26 @@\n "inputs": [\n {\n "description": "", \n- "name": "Gene feature information"\n+ "name": "Species tree"\n }\n ], \n- "label": "Gene feature information", \n+ "label": "Species tree", \n "name": "Input dataset", \n "outputs": [], \n "position": {\n- "left": 202.8125, \n- "top": 469.0625\n+ "left": 244.875, \n+ "top": 947.578125\n }, \n "tool_id": null, \n- "tool_state": "{\\"name\\": \\"Gene feature information\\"}", \n+ "tool_state": "{\\"name\\": \\"Species tree\\"}", \n "tool_version": null, \n "type": "data_input", \n- "uuid": "3fbf4a09-9207-4a18-a133-0d865f9614f6", \n+ "uuid": "1f2ca308-faad-44fa-ba48-fff5aeecbbd3", \n "workflow_outputs": [\n {\n "label": null, \n "output_name": "output", \n- "uuid": "39787c6d-ecd8-4147-b718-83d739202b3b"\n+ "uuid": "50734600-41fa-437a-9b2b-6572fd8763a2"\n }\n ]\n }, \n@@ -77,26 +77,26 @@\n "inputs": [\n {\n "description": "", \n- "name": "Species tree"\n+ "name": "Gene feature information"\n }\n ], \n- "label": "Species tree", \n+ "label": "Gene feature information", \n "name": "Input dataset", \n "outputs": [], \n "position": {\n- "left": 201.921875, \n- "top": 903.09375\n+ "left": 247.796875, \n+ "top": 1200.5625\n }, \n "tool_id": null, \n- "tool_state": "{\\"name\\": \\"Species tree\\"}", \n+ "tool_state": "{\\"name\\": \\"Gene feature information\\"}", \n "tool_version": null, \n "type": "data_input", \n- "uuid": "1f2ca308-faad-44fa-ba48-fff5aeecbbd3", \n+ "uuid": "3fbf4a09-9207-4a18-a133-0d865f9614f6", \n "workflow_outputs": [\n {\n "label": null, \n "output_name": "output", \n- "uuid": "50734600-41fa-437a-9b2b-6572fd8763a2"\n+ "uuid": "39787c6d-ecd8-4147-b718-83d739202b3b"\n }\n ]\n }, \n@@ -111,12 +111,7 @@\n "output_name": "output"\n }\n }, \n- "inputs": [\n- {\n- "description": "runtime parameter for tool transeq", \n- "name": "input1"\n- }\n- ], \n+ "inputs": [], \n "label": null, \n "name": "transeq", \n "outputs": [\n@@ -126,8 +121,8 @@\n }\n ], \n "position": {\n- "left": 460.0625, \n- "top": 200\n+ "left": 550.046875, \n+ "top": 260.484375\n }, \n "post_job_actions": {\n "HideDatasetActionout_file1": {\n@@ -138,12 +133,12 @@\n }, \n "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/emboss_5/EMBOSS: transeq101/5.0.0", \n "tool_shed_repository": {\n- "changeset_revision": "1b6538ec8b56", \n+ "changeset_revision": "dc492eb6a4fc", \n "name": "emboss_5", \n "owner": "devteam", \n "tool_shed": "too'..b'{\\"__page__\\": null, \\"treeFile\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"ConnectedValue\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"alignmentFile\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"ConnectedValue\\\\\\"}\\", \\"genesFile\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"ConnectedValue\\\\\\"}\\"}", \n "tool_version": "0.3.0", \n "type": "tool", \n "uuid": "049fb5e1-4f3b-4cf5-80a6-2fa609674fc5", \n@@ -840,8 +864,91 @@\n "uuid": "a3dbb7a3-356a-4019-a10d-08896e990c8e"\n }\n ]\n+ }, \n+ "19": {\n+ "annotation": "", \n+ "content_id": "__FLATTEN__", \n+ "errors": null, \n+ "id": 19, \n+ "input_connections": {\n+ "input": {\n+ "id": 17, \n+ "output_name": "genetrees_lists"\n+ }\n+ }, \n+ "inputs": [], \n+ "label": null, \n+ "name": "Flatten Collection", \n+ "outputs": [\n+ {\n+ "name": "output", \n+ "type": "input"\n+ }\n+ ], \n+ "position": {\n+ "left": 2206.984375, \n+ "top": 1024.96875\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output"\n+ }\n+ }, \n+ "tool_id": "__FLATTEN__", \n+ "tool_state": "{\\"input\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"ConnectedValue\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"join_identifier\\": \\"\\\\\\"_\\\\\\"\\", \\"__page__\\": null}", \n+ "tool_version": "1.0.0", \n+ "type": "tool", \n+ "uuid": "5f6a95f6-1036-4a33-b610-a080b971a324", \n+ "workflow_outputs": []\n+ }, \n+ "20": {\n+ "annotation": "", \n+ "content_id": "toolshed.g2.bx.psu.edu/repos/earlhaminst/ete/ete_homology_classifier/3.1.1", \n+ "errors": null, \n+ "id": 20, \n+ "input_connections": {\n+ "genetreeFile": {\n+ "id": 19, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [], \n+ "label": null, \n+ "name": "Homology Classifier and Filter", \n+ "outputs": [\n+ {\n+ "name": "homology", \n+ "type": "tabular"\n+ }\n+ ], \n+ "position": {\n+ "left": 2341.984375, \n+ "top": 1160.96875\n+ }, \n+ "post_job_actions": {}, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/earlhaminst/ete/ete_homology_classifier/3.1.1", \n+ "tool_shed_repository": {\n+ "changeset_revision": "6a5282f71f82", \n+ "name": "ete", \n+ "owner": "earlhaminst", \n+ "tool_shed": "toolshed.g2.bx.psu.edu"\n+ }, \n+ "tool_state": "{\\"__page__\\": null, \\"__rerun_remap_job_id__\\": null, \\"genetreeFile\\": \\"{\\\\\\"__class__\\\\\\": \\\\\\"ConnectedValue\\\\\\"}\\", \\"format_type\\": \\"{\\\\\\"__current_case__\\\\\\": 0, \\\\\\"homologies\\\\\\": [\\\\\\"one-to-one\\\\\\", \\\\\\"one-to-many\\\\\\", \\\\\\"many-to-one\\\\\\", \\\\\\"many-to-many\\\\\\", \\\\\\"paralogs\\\\\\"], \\\\\\"out_format\\\\\\": \\\\\\"tabular\\\\\\"}\\"}", \n+ "tool_version": "3.1.1", \n+ "type": "tool", \n+ "uuid": "427c5e21-0ed7-41b9-8e2b-447ea99e5c29", \n+ "workflow_outputs": [\n+ {\n+ "label": null, \n+ "output_name": "homology", \n+ "uuid": "d20dba82-b0c5-41d7-9f2e-f67240548967"\n+ }\n+ ]\n }\n }, \n "tags": [], \n- "uuid": "bbf45200-b853-41bf-a853-abb1dec8ca9d"\n+ "uuid": "45eedb69-98b5-4ea2-9f17-791fa54ef823", \n+ "version": 20\n }\n\\ No newline at end of file\n' |
b |
diff -r 06470b2e491f -r 266800d51605 GeneSeqToFamily.png |
b |
Binary file GeneSeqToFamily.png has changed |
b |
diff -r 06470b2e491f -r 266800d51605 tutorial.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tutorial.md Thu Jun 10 16:18:59 2021 +0000 |
[ |
@@ -0,0 +1,89 @@ +# Introduction + +This tutorial explains how to proficiently use the GeneSeqToFamily Galaxy workflow, published in the paper Thanki et al. (2018) "GeneSeqToFamily: a Galaxy workflow to find gene families based on the Ensembl Compara GeneTrees pipeline", https://doi.org/10.1093/gigascience/giy005 + +## Galaxy +If you are new to Galaxy then get familiarised with Galaxy using [slides](https://training.galaxyproject.org/training-material/topics/introduction/slides/introduction.html#1) and [hands-on](https://training.galaxyproject.org/training-material/topics/introduction/tutorials/galaxy-intro-short/tutorial.html). + +## GeneSeqToFamily workflow + +The GeneSeqToFamily workflow can be either installed from the Galaxy ToolShed, or downloaded from https://github.com/TGAC/earlham-galaxytools/tree/master/workflows/GeneSeqToFamily and then imported into a local Galaxy or a public instance where the necessary tools are installed, e.g. [Galaxy Europe](https://usegalaxy.eu). + + +# Importing input data + +### Hands-on: Data upload +1. Make sure you have an empty analysis history. Give it a name. +### Tip: Starting a new history +* Click the gear icon at the top of the history panel +* Select the option Create New from the menu + + +2. Import Sample Data +* FASTA file: [`CDS.fasta`](https://doi.org/10.5281/zenodo.1256760) +* JSON file: [`gene.json`](https://doi.org/10.5281/zenodo.1256762) +* Species tree: [`species.nhx`](https://doi.org/10.5281/zenodo.1256753) +### Tip: Importing data via links +* Copy the link locations +* Open the Galaxy Upload Manager +* Select Paste/Fetch Data +* Paste the link into the text field +* Press Start +### Tip: Change the file type text to nhx once the data file is in your history +Click on the pencil button displayed in your data file in the history +* Choose Datatype on the top +* Select nhx +* Press save + +### + Rename the dataset to “First dataset” + +By default, when data is imported via its link, Galaxy names it with its URL. + +# Data Preparation + +To convert uploaded data into the format acceptable by GeneSeqToFamily workflow: + +## GeneSeqToFamily preparation +GeneSeqToFamily preparation is a Galaxy tool that converts genomic information from GFF/JSON format to SQLite format for easy access during the workflow. It can also add species information to the header line of the FASTA sequences. + +### Hands-on: GeneSeqToFamily preparation : Run GeneSeqToFamily preparation on the imported GFF/JSON and FASTA files +1. GeneSeqToFamily preparation +* Select JSON and/or GFFs files +* Add specific species name (in-case of GFFs) +* Corresponding CDS datasets in FASTA format: select all FASTA datasets +* Which transcripts to keep: Only canonical transcripts (or longest CDS per gene) +* Change the header line of the FASTA sequences to the following format: TranscriptId_species +* Comma-separated list of region IDs (e.g. chromosomes or scaffolds) for which FASTA sequences should be filtered: +* Run tool + + + +# Running workflow + +1. GeneSeqToFamily workflow +* Select the CDS dataset generated by the GeneSeqToFamily preparation tool +* Select Gene Feature information, SQLite generated using GeneSeqToFamily preparation tool +* Select species tree, + * Species tree can be generated using the `ete_species_tree` generator tool +* Run the workflow + + +# Visualisation + +## Aequatus visualisation Plugin + +The SQLite database generated by the GAFA tool can be rendered using a new visualization plugin, Aequatus.js. The Aequatus.js library, developed as part of the Aequatus project, has been configured to be used within Galaxy to visualize homologous gene structure and gene family relationships. + +### Hands-on: Aequatus visualization plugin +1. Aequatus visualisation Plugin +* In the history panel, expand the dataset generated by the previous step +* Choose GeneTree from side panel +* Visualise different GeneTrees + + + +# Conclusion + + +Here we covered the various steps of the GeneSeqToFamily workflow. In this tutorial we used the default parameters for the workflow steps. They might need to be changed for different sources of data. |