Mercurial > repos > iss > eurl_vtec_wgs_pt
annotate scripts/serotype.sh @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
rev | line source |
---|---|
0
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
1 tooldir="$1"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
2 paired="$2"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
3 fastqfile1="$3"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
4 fastqfile2="$4"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
5 fastafile="$5"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
6 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
7 ln -s $fastqfile1 fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
8 ln -s $fastqfile2 fastqfile2; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
9 # FILTER + ASSEMBLE + BLAST FASTQ |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
10 chmod u+x $tooldir/scripts/duk |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
11 if [ $paired = "y" ] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
12 then |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
13 $tooldir/scripts/duk -m filteredO1.fq -k 23 $tooldir/data/O_type.fsa $fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
14 $tooldir/scripts/duk -m filteredH1.fq -k 23 $tooldir/data/H_type.fsa $fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
15 cat filteredO1.fq > filteredOH1.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
16 cat filteredH1.fq >> filteredOH1.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
17 $tooldir/scripts/duk -m filteredO2.fq -k 23 $tooldir/data/O_type.fsa $fastqfile2; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
18 $tooldir/scripts/duk -m filteredH2.fq -k 23 $tooldir/data/H_type.fsa $fastqfile2; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
19 cat filteredO2.fq > filteredOH2.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
20 cat filteredH2.fq >> filteredOH2.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
21 $tooldir/scripts/fastq_pair filteredOH1.fq filteredOH2.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
22 $tooldir/scripts/fastq_pair filteredOH1.fq.single.fq fastqfile2; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
23 $tooldir/scripts/fastq_pair filteredOH2.fq.single.fq fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
24 cat filteredOH1.fq.paired.fq > filteredOH1_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
25 cat filteredOH1.fq.single.fq.paired.fq >> filteredOH1_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
26 cat fastqfile1.paired.fq >> filteredOH1_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
27 cat filteredOH2.fq.paired.fq > filteredOH2_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
28 cat fastqfile2.paired.fq >> filteredOH2_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
29 cat filteredOH2.fq.single.fq.paired.fq >> filteredOH2_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
30 dukst1filesize=$(wc -c "filteredOH1_paired.fq" | awk '{print $1}'); |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
31 dukst2filesize=$(wc -c "filteredOH2_paired.fq" | awk '{print $1}'); |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
32 if [ $dukst1filesize -gt 0 ] && [ $dukst2filesize -gt 0 ] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
33 then |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
34 perl $tooldir/scripts/spades.pl duk_spades.fasta duk_spades_contig_stats duk_spades_scaffolds duk_spades_scaffold_stats duk_spades_log NODE spades.py --disable-gzip-output --isolate -t \${GALAXY_SLOTS:-16} --pe1-ff --pe1-1 fastq:filteredOH1_paired.fq --pe1-2 fastq:filteredOH2_paired.fq |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
35 rm -r output_dir; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
36 blastn -query duk_spades.fasta -db $tooldir/data/O_type -task blastn -evalue 0.001 -out duk_O_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
37 blastn -query duk_spades.fasta -db $tooldir/data/H_type -task blastn -evalue 0.001 -out duk_H_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
38 else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
39 touch duk_O_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
40 touch duk_H_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
41 fi |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
42 else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
43 $tooldir/scripts/duk -m filteredO1.fq -k 23 $tooldir/data/O_type.fsa $fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
44 $tooldir/scripts/duk -m filteredH1.fq -k 23 $tooldir/data/H_type.fsa $fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
45 cat filteredO1.fq > filteredOH1.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
46 cat filteredH1.fq >> filteredOH1.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
47 dukstx1filesize=$(wc -c "filteredOH1.fq" | awk '{print $1}'); |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
48 if [ $dukstx1filesize -gt 0 ] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
49 then |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
50 perl $tooldir/scripts/spades.pl duk_spades.fasta duk_spades_contig_stats duk_spades_scaffolds duk_spades_scaffold_stats duk_spades_log NODE spades.py --disable-gzip-output --isolate -t \${GALAXY_SLOTS:-16} --iontorrent -s fastq:filteredOH1.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
51 rm -r output_dir; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
52 blastn -query duk_spades.fasta -db $tooldir/data/O_type -task blastn -evalue 0.001 -out duk_O_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
53 blastn -query duk_spades.fasta -db $tooldir/data/H_type -task blastn -evalue 0.001 -out duk_H_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
54 else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
55 touch duk_O_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
56 touch duk_H_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
57 fi |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
58 fi |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
59 # BLAST FASTA |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
60 blastn -query $fastafile -db $tooldir/data/O_type -task blastn -evalue 0.001 -out fasta_O_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
61 blastn -query $fastafile -db $tooldir/data/H_type -task blastn -evalue 0.001 -out fasta_H_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
62 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
63 # COMBINE |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
64 cat duk_O_seqs > serogroup_O; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
65 cat fasta_O_seqs >> serogroup_O; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
66 cat duk_H_seqs > serogroup_H; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
67 cat fasta_H_seqs >> serogroup_H; |