Mercurial > repos > petr-novak > repeatrxplorer
comparison bin/align_parsing.pl @ 0:1d1b9e1b2e2f draft
Uploaded
| author | petr-novak |
|---|---|
| date | Thu, 19 Dec 2019 10:24:45 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1d1b9e1b2e2f |
|---|---|
| 1 #!/usr/bin/env perl | |
| 2 | |
| 3 # Parses align file, calculates read depth (RD) and genome representation (GR) | |
| 4 # of individual contigs; outputs contig sequences in fasta format with this | |
| 5 # information in the fasta header: | |
| 6 # >CLXConitgY (length-average_RD-GR) | |
| 7 # | |
| 8 # RD profiles along the contig sequences can also be calculated and saved | |
| 9 # in a separate file. | |
| 10 # | |
| 11 # (renamed from align_parsing_2.pl, 2010-05-10) | |
| 12 | |
| 13 | |
| 14 use Getopt::Std; | |
| 15 use warnings; | |
| 16 | |
| 17 getopt('iop'); | |
| 18 if ($opt_i) { | |
| 19 $align_file = $opt_i; # | |
| 20 } else { | |
| 21 die "Missing parameter: -i align_file\n"; | |
| 22 } | |
| 23 if ($opt_o) { | |
| 24 $out_file = $opt_o; # | |
| 25 } else { | |
| 26 $out_file = $align_file.".info"; | |
| 27 print "Output file not set, using \"$out_file\"\n"; | |
| 28 } | |
| 29 if ($opt_p) { | |
| 30 $profile_file = $opt_p; # RD profiles will be recorded | |
| 31 } else { | |
| 32 $profile_file = 0; # will be used below to switch profile output off | |
| 33 print "Parameter -p not set, RD profiles will not be saved\n"; | |
| 34 } | |
| 35 | |
| 36 open (ALIGN,$align_file) or die "Could not read from $align_file\n"; | |
| 37 open (OUT,">$out_file") or die "Could not write to $out_file\n"; | |
| 38 if ($profile_file) { | |
| 39 open (PROF,">$profile_file") or die "Could not write to $profile_file\n"; | |
| 40 } | |
| 41 | |
| 42 while ($radek = <ALIGN>) { | |
| 43 if ($radek =~/^DETAILED DISPLAY OF CONTIGS/) { # start of alignment section | |
| 44 # print $radek; | |
| 45 $radek = <ALIGN>; | |
| 46 while ($radek =~/\*\*\* (CL\d+) {0,1}Contig (\d+) \*\*\*/) { # parsing individual contigs | |
| 47 # print $radek; | |
| 48 $contig_id = $1."Contig".$2; | |
| 49 @pokryti = (); # array ve ktere bude pro kazdou pozici v sekvenci pokryti | |
| 50 $seq = ""; # consensus sekvence contigu | |
| 51 # print "$contig_id\n"; | |
| 52 # print $radek; | |
| 53 $radek = <ALIGN>; | |
| 54 while ($radek =~/\: \. \:/) { # -> nasleduje dalsi blok | |
| 55 @blok = (); # radky z aktualniho bloku alignmentu | |
| 56 do { | |
| 57 $radek = <ALIGN>; | |
| 58 push(@blok,$radek); | |
| 59 } while (not $radek =~/^consensus/); | |
| 60 $cons_line = $radek; | |
| 61 $radek = <ALIGN>; | |
| 62 $radek = <ALIGN>; # toto posledni nacteni radku slouzi v podmince tohoto i nadrazeneho cyklu while | |
| 63 if (not $radek) { $radek = " "; }; # aby to nehlasilo chyby na konci souboru kde ty radky muzou chybet | |
| 64 | |
| 65 pop(@blok); pop(@blok); # odstrani posledni dva radky (mezera a consensus) | |
| 66 | |
| 67 for ($f=10;$f<=length($cons_line);$f++) { | |
| 68 $suma_pozice = 0; | |
| 69 if (substr($cons_line,$f,1) =~/([A,T,G,C,N])/) { | |
| 70 $seq .= $1; | |
| 71 foreach $cteni (@blok) { | |
| 72 if (substr($cteni,$f,1) =~/[A,T,G,C,N]/) { | |
| 73 $suma_pozice++; | |
| 74 } | |
| 75 } | |
| 76 push(@pokryti,$suma_pozice); | |
| 77 } | |
| 78 } | |
| 79 | |
| 80 } | |
| 81 | |
| 82 $delka_cons = @pokryti; | |
| 83 $soucet = 0; | |
| 84 $prumer = 0; | |
| 85 foreach $suma_pozice (@pokryti) { | |
| 86 $soucet += $suma_pozice; | |
| 87 } | |
| 88 $prumer = sprintf("%0.1f",$soucet/$delka_cons); | |
| 89 | |
| 90 print OUT ">$contig_id ($delka_cons-$prumer-$soucet)\n"; | |
| 91 while ($seq_line = substr($seq,0,60,"")) { | |
| 92 print OUT "$seq_line\n"; | |
| 93 } | |
| 94 if ($profile_file) { | |
| 95 print PROF ">$contig_id ($delka_cons-$prumer-$soucet)\n"; | |
| 96 foreach $suma_pozice (@pokryti) { | |
| 97 print PROF "$suma_pozice "; | |
| 98 } | |
| 99 print PROF "\n"; | |
| 100 } | |
| 101 | |
| 102 } | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 close ALIGN; | |
| 107 close OUT; | |
| 108 if ($profile_file) { | |
| 109 close PROF; | |
| 110 } | |
| 111 |
