Mercurial > repos > dereeper > pangenome_explorer
comparison PanExplorer_workflow/Perl/Naegleria/assignFastqByITS.pl @ 1:032f6b3806a3 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:16:08 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:3cbb01081cde | 1:032f6b3806a3 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 use strict; | |
4 | |
5 my $directory; | |
6 if ($ARGV[0]){ | |
7 $directory = $ARGV[0]; | |
8 } | |
9 else{ | |
10 print "You must provide as an argument a directory containing fastq.gz file for testing the presence of ITS sequences\nex: perl assignFastqByITS.pl my_fastq_dir\n"; | |
11 exit; | |
12 } | |
13 | |
14 my %sequences =( | |
15 "1"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA", | |
16 "2"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
17 "3"=> "AAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
18 "4"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA", | |
19 "5"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA", | |
20 "6"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
21 "7"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
22 "8"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA" | |
23 ); | |
24 | |
25 | |
26 open(LS,"ls $directory/*.fastq.gz |"); | |
27 while(<LS>){ | |
28 my $file = $_; | |
29 $file =~s/\n//g;$file =~s/\r//g; | |
30 foreach my $genotype(sort {$a<=>$b} keys(%sequences)){ | |
31 my $seq = $sequences{$genotype}; | |
32 my $cmd = `zgrep -c '$seq' $file`; | |
33 $cmd =~s/\n//g;$cmd =~s/\r//g; | |
34 print "$file $genotype $cmd\n"; | |
35 } | |
36 } | |
37 close(LS); |