annotate test-data/annotemp/pivot_wider_jupytool_notebook.ipynb @ 0:ad96b20423cf draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
author ecology
date Fri, 27 Sep 2024 13:01:04 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
1 {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
2 "cells": [
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
3 {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
4 "cell_type": "markdown",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
5 "metadata": {},
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
6 "source": [
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
7 "# Pivot wider Jupytool "
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
8 ]
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
9 },
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
10 {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
11 "cell_type": "markdown",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
12 "metadata": {},
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
13 "source": [
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
14 "This Jupyter notebook is dedicated to the pivot_wider function from the tidyr R package. \n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
15 "This script is the final part of the data preparation for the ecoregionalization Galaxy workflow. "
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
16 ]
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
17 },
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
18 {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
19 "cell_type": "code",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
20 "execution_count": 62,
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
21 "metadata": {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
22 "tags": []
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
23 },
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
24 "outputs": [],
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
25 "source": [
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
26 "#Date : 22/05/2024\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
27 "#Author : Seguineau Pauline & Yvan Le Bras \n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
28 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
29 "#Load libraries\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
30 "library(tidyr)\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
31 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
32 "#load file \n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
33 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
34 "input_path = \"galaxy_inputs\"\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
35 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
36 "for (dir in list.dirs(input_path)){\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
37 " for (file in list.files(dir)) {\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
38 " file_path = file.path(dir, file)}\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
39 "}\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
40 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
41 "file = read.table(file_path,header=T, sep = \"\\t\")\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
42 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
43 "#Run pivot_wider function\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
44 "pivot_file = pivot_wider(data = file,\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
45 " names_from = phylum_class_order_family_genus_specificEpithet,\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
46 " values_from = individualCount,\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
47 " values_fill = 0,\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
48 " values_fn = sum)\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
49 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
50 "#Replace all occurences >= 1 by 1 to have only presence (1) or absence (0) data\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
51 "for(c in 3:length(pivot_file)){\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
52 " pivot_file[c][pivot_file[c]>=1] <- 1}\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
53 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
54 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
55 "write.table(pivot_file, \"outputs/pivot_file.tabular\", sep = \"\\t\", quote = F, row.names = F)"
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
56 ]
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
57 },
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
58 {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
59 "cell_type": "markdown",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
60 "metadata": {},
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
61 "source": [
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
62 "In this Jupyter notebook, we used the pivot_wider function of the tidyr package to transform our data into a wider format and adapted to subsequent analyses as part of the Galaxy workflow for ecoregionalization. This transformation allowed us to convert our data to a format where each taxon becomes a separate column. We also took care to fill in the missing values with zeros and to sum the individual counts in case of duplications. Then all data >= 1 are replace by 1 to have only presence (1) or abscence (0) data.\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
63 "\n",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
64 "Thus, this notebook is an essential building block of our analysis pipeline, ensuring that the data is properly formatted and ready to be explored and interpreted for ecoregionalization studies."
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
65 ]
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
66 }
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
67 ],
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
68 "metadata": {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
69 "kernelspec": {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
70 "display_name": "R",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
71 "language": "R",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
72 "name": "ir"
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
73 },
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
74 "language_info": {
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
75 "codemirror_mode": "r",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
76 "file_extension": ".r",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
77 "mimetype": "text/x-r-source",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
78 "name": "R",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
79 "pygments_lexer": "r",
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
80 "version": "4.0.3"
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
81 }
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
82 },
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
83 "nbformat": 4,
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
84 "nbformat_minor": 4
ad96b20423cf planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
ecology
parents:
diff changeset
85 }