annotate test-data/generate_test_data.sh @ 0:2595c27071c2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
author iuc
date Sat, 15 Feb 2020 15:32:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
1 #!/usr/bin/bash
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
2
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
3 # E. coli locus b0842 (b0842.fasta.gz) downloaded from Enterobase E. coli cgMLST scheme
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
4 # requires: wget, kma, bwa, samtools, bedtools
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
5
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
6 gunzip b0842.fasta.gz
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
7
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
8 # Take first 5 alleles to reduce size of test data
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
9 mkdir ecoli_cgMLST
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
10 head -n 10 b0842.fasta > ecoli_cgMLST/ecoli_b0842_1to5.fasta
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
11
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
12 kma index -k 8 -i ecoli_cgMLST/ecoli_b0842_1to5.fasta -o ecoli_cgMLST/ecoli_b0842_1to5
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
13
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
14 wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR884/ERR884056/ERR884056_1.fastq.gz
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
15
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
16 # Use bwa to map reads to reduced E. coli locus b0842
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
17 # and extract only mapped reads (to reduce size of test dataset)
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
18 bwa index ecoli_cgMLST/ecoli_b0842_1to5.fasta
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
19
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
20 bwa mem ecoli_cgMLST/ecoli_b0842_1to5.fasta ERR884056_1.fastq.gz -o ERR884056_1_ecoli_b0842_1to5.sam
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
21
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
22 samtools view ERR884056_1_ecoli_b0842_1to5.sam -bo ERR884056_1_ecoli_b0842_1to5.bam
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
23
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
24 # Select mapped reads
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
25 samtools view -b -F 4 ERR884056_1_ecoli_b0842_1to5.bam > ERR884056_1_ecoli_b0842_1to5.mapped.bam
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
26
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
27 samtools sort -n ERR884056_1_ecoli_b0842_1to5.mapped.bam -o ERR884056_1_ecoli_b0842_1to5.mapped.sort.bam
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
28
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
29 bedtools bamtofastq -i ERR884056_1_ecoli_b0842_1to5.mapped.sort.bam -fq ERR884056_ecoli_b0842.mapped_R1.fastq
2595c27071c2 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff changeset
30