Mercurial > repos > dcouvin > removechar
comparison removeChar.pl @ 0:587281a1acec draft
Uploaded
| author | dcouvin |
|---|---|
| date | Fri, 17 Sep 2021 19:29:45 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:587281a1acec |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 use strict; | |
| 3 | |
| 4 ############################################################################ | |
| 5 # script to remove position or column from a multi-Fasta file | |
| 6 # in function of a given character | |
| 7 ############################################################################ | |
| 8 | |
| 9 | |
| 10 my $inFile = $ARGV[0]; #'example_seq.fasta'; | |
| 11 my $char = $ARGV[1]; #'N'; | |
| 12 my @headers = (); | |
| 13 my @sequences = (); | |
| 14 my $index = 0; | |
| 15 my $outFile = 'results.fna'; | |
| 16 open(IN,'<',$inFile) or die "Unable to read file $inFile: $!\n"; | |
| 17 while( defined( my $line = <IN> ) ){ | |
| 18 chomp($line); | |
| 19 if( $line =~ m/^>/ ){ | |
| 20 $headers[$index] = $line; | |
| 21 $index++; | |
| 22 } | |
| 23 else{ | |
| 24 $sequences[$index-1] .= $line; | |
| 25 } | |
| 26 } | |
| 27 close(IN); | |
| 28 my %lookup = (); | |
| 29 for(my $i=0;$i<=$#sequences;$i++){ | |
| 30 my $seq = $sequences[$i]; | |
| 31 my $len = length($seq); | |
| 32 for(my $j=0;$j<$len;$j++){ | |
| 33 my $residue = substr($seq,$j,1); | |
| 34 if( $residue eq $char ){ | |
| 35 $lookup{$j} = 1; | |
| 36 } | |
| 37 } | |
| 38 } | |
| 39 #print "# Skipped the following positions (zero indexed):\n"; | |
| 40 #print "# ",join(", ", sort {$a <=> $b} keys (%lookup)), "\n"; | |
| 41 #print "# Cleaned sequences:\n"; | |
| 42 #open(OUT,'>',$outFile) or die "Unable to write file $outFile: $!\n"; | |
| 43 for(my $i=0;$i<=$#headers;$i++){ | |
| 44 my $head = $headers[$i]; | |
| 45 my $seq = $sequences[$i]; | |
| 46 my $len = length($seq); | |
| 47 my $out = ''; | |
| 48 for(my $j=0;$j<$len;$j++){ | |
| 49 my $residue = substr($seq,$j,1); | |
| 50 $out .= $residue unless exists $lookup{$j}; | |
| 51 } | |
| 52 print $head, "\n", $out, "\n"; | |
| 53 #print OUT $head, "\n", $out, "\n"; | |
| 54 } | |
| 55 #close(OUT); | |
| 56 #print "\n"; | |
| 57 #print "End of program! Your result is written in file $outFile\n"; |
