annotate test-data/annotemp/pivot_wider_jupytool_notebook.ipynb @ 3:67b2cd9c2954 draft

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
author ecology
date Tue, 10 Sep 2024 12:52:24 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
1 {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
2 "cells": [
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
3 {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
4 "cell_type": "markdown",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
5 "metadata": {},
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
6 "source": [
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
7 "# Pivot wider Jupytool "
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
8 ]
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
9 },
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
10 {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
11 "cell_type": "markdown",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
12 "metadata": {},
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
13 "source": [
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
14 "This Jupyter notebook is dedicated to the pivot_wider function from the tidyr R package. \n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
15 "This script is the final part of the data preparation for the ecoregionalization Galaxy workflow. "
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
16 ]
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
17 },
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
18 {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
19 "cell_type": "code",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
20 "execution_count": 62,
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
21 "metadata": {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
22 "tags": []
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
23 },
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
24 "outputs": [],
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
25 "source": [
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
26 "#Date : 22/05/2024\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
27 "#Author : Seguineau Pauline & Yvan Le Bras \n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
28 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
29 "#Load libraries\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
30 "library(tidyr)\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
31 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
32 "#load file \n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
33 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
34 "input_path = \"galaxy_inputs\"\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
35 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
36 "for (dir in list.dirs(input_path)){\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
37 " for (file in list.files(dir)) {\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
38 " file_path = file.path(dir, file)}\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
39 "}\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
40 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
41 "file = read.table(file_path,header=T, sep = \"\\t\")\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
42 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
43 "#Run pivot_wider function\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
44 "pivot_file = pivot_wider(data = file,\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
45 " names_from = phylum_class_order_family_genus_specificEpithet,\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
46 " values_from = individualCount,\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
47 " values_fill = 0,\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
48 " values_fn = sum)\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
49 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
50 "#Replace all occurences >= 1 by 1 to have only presence (1) or absence (0) data\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
51 "for(c in 3:length(pivot_file)){\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
52 " pivot_file[c][pivot_file[c]>=1] <- 1}\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
53 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
54 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
55 "write.table(pivot_file, \"outputs/pivot_file.tabular\", sep = \"\\t\", quote = F, row.names = F)"
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
56 ]
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
57 },
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
58 {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
59 "cell_type": "markdown",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
60 "metadata": {},
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
61 "source": [
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
62 "In this Jupyter notebook, we used the pivot_wider function of the tidyr package to transform our data into a wider format and adapted to subsequent analyses as part of the Galaxy workflow for ecoregionalization. This transformation allowed us to convert our data to a format where each taxon becomes a separate column. We also took care to fill in the missing values with zeros and to sum the individual counts in case of duplications. Then all data >= 1 are replace by 1 to have only presence (1) or abscence (0) data.\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
63 "\n",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
64 "Thus, this notebook is an essential building block of our analysis pipeline, ensuring that the data is properly formatted and ready to be explored and interpreted for ecoregionalization studies."
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
65 ]
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
66 }
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
67 ],
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
68 "metadata": {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
69 "kernelspec": {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
70 "display_name": "R",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
71 "language": "R",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
72 "name": "ir"
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
73 },
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
74 "language_info": {
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
75 "codemirror_mode": "r",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
76 "file_extension": ".r",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
77 "mimetype": "text/x-r-source",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
78 "name": "R",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
79 "pygments_lexer": "r",
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
80 "version": "4.0.3"
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
81 }
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
82 },
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
83 "nbformat": 4,
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
84 "nbformat_minor": 4
67b2cd9c2954 planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/EMLassemblyline commit 2d36dc964f548b5acbc43ffd78e51e6fc7dc80bb
ecology
parents:
diff changeset
85 }