Mercurial > repos > iss > eurl_vtec_wgs_pt
view scripts/serotype.sh @ 0:c6bab5103a14 draft
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
author | iss |
---|---|
date | Mon, 21 Mar 2022 15:23:09 +0000 |
parents | |
children |
line wrap: on
line source
tooldir="$1"; paired="$2"; fastqfile1="$3"; fastqfile2="$4"; fastafile="$5"; ln -s $fastqfile1 fastqfile1; ln -s $fastqfile2 fastqfile2; # FILTER + ASSEMBLE + BLAST FASTQ chmod u+x $tooldir/scripts/duk if [ $paired = "y" ] then $tooldir/scripts/duk -m filteredO1.fq -k 23 $tooldir/data/O_type.fsa $fastqfile1; $tooldir/scripts/duk -m filteredH1.fq -k 23 $tooldir/data/H_type.fsa $fastqfile1; cat filteredO1.fq > filteredOH1.fq; cat filteredH1.fq >> filteredOH1.fq; $tooldir/scripts/duk -m filteredO2.fq -k 23 $tooldir/data/O_type.fsa $fastqfile2; $tooldir/scripts/duk -m filteredH2.fq -k 23 $tooldir/data/H_type.fsa $fastqfile2; cat filteredO2.fq > filteredOH2.fq; cat filteredH2.fq >> filteredOH2.fq; $tooldir/scripts/fastq_pair filteredOH1.fq filteredOH2.fq; $tooldir/scripts/fastq_pair filteredOH1.fq.single.fq fastqfile2; $tooldir/scripts/fastq_pair filteredOH2.fq.single.fq fastqfile1; cat filteredOH1.fq.paired.fq > filteredOH1_paired.fq; cat filteredOH1.fq.single.fq.paired.fq >> filteredOH1_paired.fq; cat fastqfile1.paired.fq >> filteredOH1_paired.fq; cat filteredOH2.fq.paired.fq > filteredOH2_paired.fq; cat fastqfile2.paired.fq >> filteredOH2_paired.fq; cat filteredOH2.fq.single.fq.paired.fq >> filteredOH2_paired.fq; dukst1filesize=$(wc -c "filteredOH1_paired.fq" | awk '{print $1}'); dukst2filesize=$(wc -c "filteredOH2_paired.fq" | awk '{print $1}'); if [ $dukst1filesize -gt 0 ] && [ $dukst2filesize -gt 0 ] then perl $tooldir/scripts/spades.pl duk_spades.fasta duk_spades_contig_stats duk_spades_scaffolds duk_spades_scaffold_stats duk_spades_log NODE spades.py --disable-gzip-output --isolate -t \${GALAXY_SLOTS:-16} --pe1-ff --pe1-1 fastq:filteredOH1_paired.fq --pe1-2 fastq:filteredOH2_paired.fq rm -r output_dir; blastn -query duk_spades.fasta -db $tooldir/data/O_type -task blastn -evalue 0.001 -out duk_O_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; blastn -query duk_spades.fasta -db $tooldir/data/H_type -task blastn -evalue 0.001 -out duk_H_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; else touch duk_O_seqs; touch duk_H_seqs; fi else $tooldir/scripts/duk -m filteredO1.fq -k 23 $tooldir/data/O_type.fsa $fastqfile1; $tooldir/scripts/duk -m filteredH1.fq -k 23 $tooldir/data/H_type.fsa $fastqfile1; cat filteredO1.fq > filteredOH1.fq; cat filteredH1.fq >> filteredOH1.fq; dukstx1filesize=$(wc -c "filteredOH1.fq" | awk '{print $1}'); if [ $dukstx1filesize -gt 0 ] then perl $tooldir/scripts/spades.pl duk_spades.fasta duk_spades_contig_stats duk_spades_scaffolds duk_spades_scaffold_stats duk_spades_log NODE spades.py --disable-gzip-output --isolate -t \${GALAXY_SLOTS:-16} --iontorrent -s fastq:filteredOH1.fq; rm -r output_dir; blastn -query duk_spades.fasta -db $tooldir/data/O_type -task blastn -evalue 0.001 -out duk_O_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; blastn -query duk_spades.fasta -db $tooldir/data/H_type -task blastn -evalue 0.001 -out duk_H_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; else touch duk_O_seqs; touch duk_H_seqs; fi fi # BLAST FASTA blastn -query $fastafile -db $tooldir/data/O_type -task blastn -evalue 0.001 -out fasta_O_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; blastn -query $fastafile -db $tooldir/data/H_type -task blastn -evalue 0.001 -out fasta_H_seqs -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles' -num_threads 8 -strand both -dust yes -max_target_seqs 10 -perc_identity 95.0; # COMBINE cat duk_O_seqs > serogroup_O; cat fasta_O_seqs >> serogroup_O; cat duk_H_seqs > serogroup_H; cat fasta_H_seqs >> serogroup_H;