annotate test-data/annotemp/pivot_wider_jupytool_notebook.ipynb @ 1:dbba9bedd4bd draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 4b040fe7867d965fb88ce70cc08081367b62b063
author ecology
date Fri, 27 Sep 2024 13:00:18 +0000
parents fab3ca90cb26
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
1 {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
2 "cells": [
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
3 {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
4 "cell_type": "markdown",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
5 "metadata": {},
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
6 "source": [
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
7 "# Pivot wider Jupytool "
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
8 ]
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
9 },
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
10 {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
11 "cell_type": "markdown",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
12 "metadata": {},
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
13 "source": [
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
14 "This Jupyter notebook is dedicated to the pivot_wider function from the tidyr R package. \n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
15 "This script is the final part of the data preparation for the ecoregionalization Galaxy workflow. "
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
16 ]
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
17 },
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
18 {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
19 "cell_type": "code",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
20 "execution_count": 62,
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
21 "metadata": {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
22 "tags": []
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
23 },
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
24 "outputs": [],
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
25 "source": [
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
26 "#Date : 22/05/2024\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
27 "#Author : Seguineau Pauline & Yvan Le Bras \n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
28 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
29 "#Load libraries\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
30 "library(tidyr)\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
31 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
32 "#load file \n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
33 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
34 "input_path = \"galaxy_inputs\"\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
35 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
36 "for (dir in list.dirs(input_path)){\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
37 " for (file in list.files(dir)) {\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
38 " file_path = file.path(dir, file)}\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
39 "}\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
40 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
41 "file = read.table(file_path,header=T, sep = \"\\t\")\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
42 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
43 "#Run pivot_wider function\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
44 "pivot_file = pivot_wider(data = file,\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
45 " names_from = phylum_class_order_family_genus_specificEpithet,\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
46 " values_from = individualCount,\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
47 " values_fill = 0,\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
48 " values_fn = sum)\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
49 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
50 "#Replace all occurences >= 1 by 1 to have only presence (1) or absence (0) data\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
51 "for(c in 3:length(pivot_file)){\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
52 " pivot_file[c][pivot_file[c]>=1] <- 1}\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
53 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
54 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
55 "write.table(pivot_file, \"outputs/pivot_file.tabular\", sep = \"\\t\", quote = F, row.names = F)"
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
56 ]
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
57 },
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
58 {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
59 "cell_type": "markdown",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
60 "metadata": {},
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
61 "source": [
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
62 "In this Jupyter notebook, we used the pivot_wider function of the tidyr package to transform our data into a wider format and adapted to subsequent analyses as part of the Galaxy workflow for ecoregionalization. This transformation allowed us to convert our data to a format where each taxon becomes a separate column. We also took care to fill in the missing values with zeros and to sum the individual counts in case of duplications. Then all data >= 1 are replace by 1 to have only presence (1) or abscence (0) data.\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
63 "\n",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
64 "Thus, this notebook is an essential building block of our analysis pipeline, ensuring that the data is properly formatted and ready to be explored and interpreted for ecoregionalization studies."
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
65 ]
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
66 }
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
67 ],
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
68 "metadata": {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
69 "kernelspec": {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
70 "display_name": "R",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
71 "language": "R",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
72 "name": "ir"
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
73 },
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
74 "language_info": {
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
75 "codemirror_mode": "r",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
76 "file_extension": ".r",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
77 "mimetype": "text/x-r-source",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
78 "name": "R",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
79 "pygments_lexer": "r",
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
80 "version": "4.0.3"
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
81 }
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
82 },
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
83 "nbformat": 4,
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
84 "nbformat_minor": 4
fab3ca90cb26 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
85 }