comparison removeChar.pl @ 0:587281a1acec draft

Uploaded
author dcouvin
date Fri, 17 Sep 2021 19:29:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:587281a1acec
1 #!/usr/bin/perl -w
2 use strict;
3
4 ############################################################################
5 # script to remove position or column from a multi-Fasta file
6 # in function of a given character
7 ############################################################################
8
9
10 my $inFile = $ARGV[0]; #'example_seq.fasta';
11 my $char = $ARGV[1]; #'N';
12 my @headers = ();
13 my @sequences = ();
14 my $index = 0;
15 my $outFile = 'results.fna';
16 open(IN,'<',$inFile) or die "Unable to read file $inFile: $!\n";
17 while( defined( my $line = <IN> ) ){
18 chomp($line);
19 if( $line =~ m/^>/ ){
20 $headers[$index] = $line;
21 $index++;
22 }
23 else{
24 $sequences[$index-1] .= $line;
25 }
26 }
27 close(IN);
28 my %lookup = ();
29 for(my $i=0;$i<=$#sequences;$i++){
30 my $seq = $sequences[$i];
31 my $len = length($seq);
32 for(my $j=0;$j<$len;$j++){
33 my $residue = substr($seq,$j,1);
34 if( $residue eq $char ){
35 $lookup{$j} = 1;
36 }
37 }
38 }
39 #print "# Skipped the following positions (zero indexed):\n";
40 #print "# ",join(", ", sort {$a <=> $b} keys (%lookup)), "\n";
41 #print "# Cleaned sequences:\n";
42 #open(OUT,'>',$outFile) or die "Unable to write file $outFile: $!\n";
43 for(my $i=0;$i<=$#headers;$i++){
44 my $head = $headers[$i];
45 my $seq = $sequences[$i];
46 my $len = length($seq);
47 my $out = '';
48 for(my $j=0;$j<$len;$j++){
49 my $residue = substr($seq,$j,1);
50 $out .= $residue unless exists $lookup{$j};
51 }
52 print $head, "\n", $out, "\n";
53 #print OUT $head, "\n", $out, "\n";
54 }
55 #close(OUT);
56 #print "\n";
57 #print "End of program! Your result is written in file $outFile\n";