annotate PanExplorer_workflow/Perl/Naegleria/assignFastqByITS.pl @ 1:032f6b3806a3 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:16:08 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
2
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
3 use strict;
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
4
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
5 my $directory;
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
6 if ($ARGV[0]){
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
7 $directory = $ARGV[0];
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
8 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
9 else{
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
10 print "You must provide as an argument a directory containing fastq.gz file for testing the presence of ITS sequences\nex: perl assignFastqByITS.pl my_fastq_dir\n";
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
11 exit;
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
12 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
13
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
14 my %sequences =(
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
15 "1"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
16 "2"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
17 "3"=> "AAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
18 "4"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
19 "5"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
20 "6"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
21 "7"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
22 "8"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA"
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
23 );
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
24
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
25
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
26 open(LS,"ls $directory/*.fastq.gz |");
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
27 while(<LS>){
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
28 my $file = $_;
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
29 $file =~s/\n//g;$file =~s/\r//g;
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
30 foreach my $genotype(sort {$a<=>$b} keys(%sequences)){
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
31 my $seq = $sequences{$genotype};
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
32 my $cmd = `zgrep -c '$seq' $file`;
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
33 $cmd =~s/\n//g;$cmd =~s/\r//g;
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
34 print "$file $genotype $cmd\n";
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
35 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
36 }
032f6b3806a3 Uploaded
dereeper
parents:
diff changeset
37 close(LS);