Mercurial > repos > iss > eurl_vtec_wgs_pt
annotate scripts/stx_subtype_pe.sh @ 6:20ff3dca457f draft default tip
planemo upload commit 6857c749c21f580c828aba3543e294b69d32b662
author | iss |
---|---|
date | Mon, 23 Oct 2023 11:45:36 +0000 |
parents | c6bab5103a14 |
children |
rev | line source |
---|---|
0
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
1 tooldir="$1"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
2 fastqfile1="$2"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
3 fastqfile2="$3"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
4 fastafile="$4"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
5 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
6 # ASSEMBLY |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
7 mkdir stxdir; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
8 skesa --fastq $fastqfile1 $fastqfile2 --contigs_out stxdir/skesa.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
9 cp $fastafile stxdir/spades.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
10 rm -r output_dir; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
11 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
12 # FILTER + ASSEMBLY |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
13 chmod u+x $tooldir/scripts/duk |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
14 $tooldir/scripts/duk -m stxdir/filtered1STX.fq -k 23 $tooldir/data/stx.fa $fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
15 $tooldir/scripts/duk -m stxdir/filtered2STX.fq -k 23 $tooldir/data/stx.fa $fastqfile2; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
16 $tooldir/scripts/fastq_pair stxdir/filtered1STX.fq stxdir/filtered2STX.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
17 $tooldir/scripts/fastq_pair stxdir/filtered1STX.fq.single.fq $fastqfile2; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
18 $tooldir/scripts/fastq_pair stxdir/filtered2STX.fq.single.fq $fastqfile1; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
19 cat stxdir/filtered1STX.fq.paired.fq > stxdir/filtered1STX_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
20 cat stxdir/filtered1STX.fq.single.fq.paired.fq >> stxdir/filtered1STX_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
21 cat $fastqfile1.paired.fq >> stxdir/filtered1STX_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
22 cat stxdir/filtered2STX.fq.paired.fq > stxdir/filtered2STX_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
23 cat $fastqfile2.paired.fq >> stxdir/filtered2STX_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
24 cat stxdir/filtered2STX.fq.single.fq.paired.fq >> stxdir/filtered2STX_paired.fq; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
25 dukstx1filesize=$(wc -c "stxdir/filtered1STX_paired.fq" | awk '{print $1}'); |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
26 dukstx2filesize=$(wc -c "stxdir/filtered2STX_paired.fq" | awk '{print $1}'); |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
27 if [ $dukstx1filesize -gt 0 ] && [ $dukstx2filesize -gt 0 ] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
28 then |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
29 skesa --fastq stxdir/filtered1STX_paired.fq stxdir/filtered2STX_paired.fq --contigs_out stxdir/duk_skesa.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
30 perl $tooldir/scripts/spades.pl duk_spades_contigs duk_spades_contig_stats duk_spades_scaffolds duk_spades_scaffold_stats duk_spades_log NODE spades.py --disable-gzip-output --isolate -t 8 --pe1-ff --pe1-1 fastq:stxdir/filtered1STX_paired.fq --pe1-2 fastq:stxdir/filtered2STX_paired.fq |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
31 mv duk_spades_contigs stxdir/duk_spades.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
32 rm -r output_dir; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
33 blastn -query stxdir/duk_skesa.fasta -db $tooldir/data/stx -task blastn -evalue 0.001 -out stxdir/duk_skesa_seqs -outfmt '6 qseqid sseqid sframe qseq' -num_threads 8 -strand both -dust yes -max_target_seqs 1 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
34 blastn -query stxdir/duk_spades.fasta -db $tooldir/data/stx -task blastn -evalue 0.001 -out stxdir/duk_spades_seqs -outfmt '6 qseqid sseqid sframe qseq' -num_threads 8 -strand both -dust yes -max_target_seqs 1 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
35 else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
36 touch stxdir/duk_skesa_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
37 touch stxdir/duk_spades_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
38 fi |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
39 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
40 # SEQUENCE SEARCH |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
41 blastn -query stxdir/skesa.fasta -db $tooldir/data/stx -task blastn -evalue 0.001 -out stxdir/skesa_seqs -outfmt '6 qseqid sseqid sframe qseq' -num_threads 8 -strand both -dust yes -max_target_seqs 1 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
42 blastn -query stxdir/spades.fasta -db $tooldir/data/stx -task blastn -evalue 0.001 -out stxdir/spades_seqs -outfmt '6 qseqid sseqid sframe qseq' -num_threads 8 -strand both -dust yes -max_target_seqs 1 -perc_identity 95.0; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
43 # DIVIDE STX1 FROM STX2 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
44 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx1") print $1,$3,$4}' stxdir/skesa_seqs > stxdir/stx1_skesa_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
45 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx2") print $1,$3,$4}' stxdir/skesa_seqs > stxdir/stx2_skesa_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
46 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx1") print $1,$3,$4}' stxdir/duk_skesa_seqs > stxdir/dukstx1_skesa_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
47 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx2") print $1,$3,$4}' stxdir/duk_skesa_seqs > stxdir/dukstx2_skesa_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
48 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx1") print $1,$3,$4}' stxdir/spades_seqs > stxdir/stx1_spades_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
49 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx2") print $1,$3,$4}' stxdir/spades_seqs > stxdir/stx2_spades_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
50 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx1") print $1,$3,$4}' stxdir/duk_spades_seqs > stxdir/dukstx1_spades_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
51 awk 'BEGIN { OFS="\t" }!seen[$1]++ {if (substr($2,1,4)=="stx2") print $1,$3,$4}' stxdir/duk_spades_seqs > stxdir/dukstx2_spades_seqs; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
52 # CREATE COMBINED MULTIFASTA FROM SEQUENCES |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
53 perl $tooldir/scripts/MultifastaFromBlast.pl "stxdir/stx1_skesa_seqs,stxdir/dukstx1_skesa_seqs,stxdir/stx1_spades_seqs,stxdir/dukstx1_spades_seqs" "stxdir/multiassembly_stx1.fasta"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
54 perl $tooldir/scripts/MultifastaFromBlast.pl "stxdir/stx2_skesa_seqs,stxdir/dukstx2_skesa_seqs,stxdir/stx2_spades_seqs,stxdir/dukstx2_spades_seqs" "stxdir/multiassembly_stx2.fasta"; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
55 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
56 # ALIGN AND GET CONSENSUS |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
57 stx1filesize=$(wc -c "stxdir/multiassembly_stx1.fasta" | awk '{print $1}'); |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
58 if [ $stx1filesize -eq 0 ] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
59 then |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
60 touch stxdir/multiassembly_stx1_consensus.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
61 else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
62 cat $tooldir/data/stx1.fa >> stxdir/multiassembly_stx1.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
63 muscle -in stxdir/multiassembly_stx1.fasta -out stxdir/multiassembly_stx1_aligned.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
64 awk 'BEGIN {RS=">" ; ORS=""} substr($1,1,4)!="stx1" {print ">"$0}' stxdir/multiassembly_stx1_aligned.fasta > stxdir/multiassembly_stx1_aligned_clean.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
65 awk '/^>/ {printf("%s%s\n",(N>0?"\n":""),$0);N++;next;} {printf("%s",$0);} END {printf("\n");}' stxdir/multiassembly_stx1_aligned_clean.fasta > stxdir/multiassembly_stx1_aligned_linear.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
66 python $tooldir/scripts/GetConsensus.py -i stxdir/multiassembly_stx1_aligned_linear.fasta -o stxdir/multiassembly_stx1_consensus.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
67 fi |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
68 |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
69 stx2filesize=$(wc -c "stxdir/multiassembly_stx2.fasta" | awk '{print $1}'); |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
70 if [ $stx2filesize -eq 0 ] |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
71 then |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
72 touch stxdir/multiassembly_stx2_consensus.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
73 else |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
74 cat $tooldir/data/stx2.fa >> stxdir/multiassembly_stx2.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
75 muscle -in stxdir/multiassembly_stx2.fasta -out stxdir/multiassembly_stx2_aligned.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
76 awk 'BEGIN {RS=">" ; ORS=""} substr($1,1,4)!="stx2" {print ">"$0}' stxdir/multiassembly_stx2_aligned.fasta > stxdir/multiassembly_stx2_aligned_clean.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
77 awk '/^>/ {printf("%s%s\n",(N>0?"\n":""),$0);N++;next;} {printf("%s",$0);} END {printf("\n");}' stxdir/multiassembly_stx2_aligned_clean.fasta > stxdir/multiassembly_stx2_aligned_linear.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
78 python $tooldir/scripts/GetConsensus.py -i stxdir/multiassembly_stx2_aligned_linear.fasta -o stxdir/multiassembly_stx2_consensus.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
79 fi |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
80 cat stxdir/multiassembly_stx1_consensus.fasta > stx.fasta; |
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
iss
parents:
diff
changeset
|
81 cat stxdir/multiassembly_stx2_consensus.fasta >> stx.fasta; |