Mercurial > repos > iuc > dada2_learnerrors
annotate test-data.sh @ 7:75403243703a draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit c2f6071f729b74540354f4a9e7084c9ac468a135
author | iuc |
---|---|
date | Mon, 07 Aug 2023 01:33:05 +0000 |
parents | c48d42d65d2b |
children |
rev | line source |
---|---|
2
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env bash |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
2 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
3 # install conda |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
4 if type conda > /dev/null; then |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
5 true |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
6 else |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
7 tmp=$(mktemp -d) |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
8 wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
9 bash Miniconda3-latest-Linux-x86_64.sh -b -p "$tmp/miniconda" |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
10 source "$tmp/miniconda/bin/activate" |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
11 fi |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
12 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
13 eval "$(conda shell.bash hook)" |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
14 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
15 # install conda env |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
16 if grep -Fq __bioconductor-dada2@1.14 <<< $(conda env list); then |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
17 true |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
18 else |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
19 conda create -y --quiet --override-channels --channel conda-forge --channel bioconda --channel defaults --name __bioconductor-dada2@1.14 bioconductor-dada2=1.14 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
20 fi |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
21 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
22 conda activate __bioconductor-dada2@1.14 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
23 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
24 # create test data |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
25 cd test-data/ |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
26 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
27 # download Mothur SOP data from zenodo (GTN), same as |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
28 # http://www.mothur.org/w/images/d/d6/MiSeqSOPData.zip but stable links |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
29 # but file names need to be fixed |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
30 wget -nc -O F3D0_S188_L001_R1_001.fastq https://zenodo.org/record/800651/files/F3D0_R1.fastq?download=1 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
31 wget -nc -O F3D0_S188_L001_R2_001.fastq https://zenodo.org/record/800651/files/F3D0_R2.fastq?download=1 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
32 wget -nc -O F3D141_S207_L001_R1_001.fastq https://zenodo.org/record/800651/files/F3D141_R1.fastq?download=1 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
33 wget -nc -O F3D141_S207_L001_R2_001.fastq https://zenodo.org/record/800651/files/F3D141_R2.fastq?download=1 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
34 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
35 # zip and reduce data to ~ 10% (for speed) |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
36 for i in *fastq |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
37 do |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
38 head -n 3000 "$i" | gzip -c > "$i.gz" |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
39 done |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
40 rm *fastq |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
41 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
42 # download data bases from https://zenodo.org/record/158955 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
43 # as mentioned in https://benjjneb.github.io/dada2/training.html |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
44 wget -nc -O reference.fa.gz https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
45 wget -nc -O reference_species.fa.gz https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
46 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
47 # take ~ 5% of the reference (for speed) |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
48 zcat reference.fa.gz | head -n 1000 | gzip -c > t && mv t reference.fa.gz |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
49 zcat reference_species.fa.gz | head -n 1000 | gzip -c > t && mv t reference_species.fa.gz |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
50 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
51 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
52 # generate outputs |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
53 Rscript gentest.R |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
54 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
55 conda deactivate |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
56 |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
57 # # remove files only needed for test generation |
c48d42d65d2b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff
changeset
|
58 # rm learnErrors_F3D0_R2.pdf dada_F3D0_R2.Rdata |