Mercurial > repos > iuc > mapseq
view mapseq2biom.pl @ 1:f1d1dafc836d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mapseq commit 4b7ab83d0fcaa1a08f0ea20b6947505575d1c585
author | iuc |
---|---|
date | Tue, 12 Mar 2024 21:22:41 +0000 |
parents | a09139ba9c06 |
children |
line wrap: on
line source
#!/usr/bin/env perl # use strict; use warnings; use Getopt::Long; my($otuFile, $mFile, $outputFile, $notaxidFile, $label, $krona, $help, @fds, @new_tax); my $taxid = 0; GetOptions( "otuTable=s" => \$otuFile, "query=s" => \$mFile, "outfile=s" => \$outputFile, "krona=s" => \$krona, "label=s" => \$label, "h|help" => \$help, "taxid!" => \$taxid, "notaxidfile:s" => \$notaxidFile, ) or die "Unknown option.\n"; if($help){ help(); } #No help has been requested. Sanity checks.... if(!$label){ $label = "Unspecified"; } #Check that the OTU table is defined and present. if(!defined($otuFile)){ die "The OTU table is not defined\n"; }elsif(!-e $otuFile){ die "The OUT table, $otuFile, does not exist\n"; } #Check that the query file is defined and present. if(!defined($mFile)){ die "The mapseq output file (query) file is not defined\n"; }elsif(!-e $mFile){ die "The mapseq file, $mFile, does not exist\n"; } #Check that the output file is defined. if(!defined($outputFile)){ die "The output file file is not defined\n"; } #This gives us an idea about what the mapseq file format. # mapseq v1.0 (Nov 13 2016) #querydbhitbitscoreidentitymatchesmismatchesgapsquery_startquery_enddbhit_startdbhit_endSILVA #contig--1213616/899-1048AY664005.1.12231410.98666668148110150170319D_0__Bacteria;D_1__Cyanobacteria;D_2__Cyanobacteria;D_3__SubsectionI;D_4__FamilyI;D_5__Synechococcus;D_6__uncultured Synechococcus sp. #contig--1126892/723-858EU805193.1.12931340.99264705135100136821957D_0__Bacteria #contig--243158/4-340JQ611080.1.9103250.99107143332113370335D_0__Archaea;D_1__Euryarchaeota;D_2__Thermoplasmata;D_3__Thermoplasmatales;D_4__Marine Group II;D_5__uncultured archaeon;D_6__uncultured archaeon #contig--1205795/1003-7EU802400.1.14959810.99395779876049970993D_0__Bacteria;D_1__Proteobacteria;D_2__Gammaproteobacteria;D_3__Oceanospirillales;D_4__SAR86 clade #contig--1213616/623-456EU802790.1.12661410.94674557160631168146314D_0__Bacteria;D_1__Cyanobacteria;D_2__Cyanobacteria;D_3__SubsectionI;D_4__FamilyI #contig--2489731/672-501LURT01000153.4866.63211410.9506173115471016212951456D_0__Archaea;D_1__Euryarchaeota;D_2__Thermoplasmata;D_3__Thermoplasmatales;D_4__Marine Group II #contig--151250/871-2AACY020187844.576.19958430.9850402585613018700869D_0__Archaea;D_1__Euryarchaeota;D_2__Thermoplasmata;D_3__Thermoplasmatales;D_4__Marine Group II #contig--1206060/3-639EF574940.1.14506080.998360636091006108401450D_0__Bacteria;D_1__Cyanobacteria;D_2__Cyanobacteria;D_3__SubsectionI;D_4__FamilyI #Need to count up when we see a taxonomy string multiple times. open(M, "<", $mFile) or die "Could not open mapseq results file $mFile:[$!]\n"; my $taxCount; my $tax=""; while(<M>){ if(/^#/){ #Skip counts next; }else{ #Pull out the fields that we need chomp; my @line = split(/\t/); if(!$line[13]){ $tax = "Unclassified"; } else { $tax= $line[13]; until ($tax !~/\_\_$/) { @fds=split (/\;/, $tax); splice @fds, -1; $tax= join(";",@fds); } } $taxCount->{$tax}->{count}++; } } close(M) or die "Could not close filehandle on mapseq file\n."; #Now we do not need to read the whole OTU table, just those that we have found #in mapseq results. #my $otuFile = "consensus_taxonomy_7_levels.otu"; open(O, "<", $otuFile) or die "Could not open OTU file $otuFile:[$!]\n"; while(<O>){ chomp; #Simple two column table, OTU code and taxonomy string. my ($otu, $tax, $taxid) = split(/\t/, $_, 3); #print "$otu | $tax"; #Have we seen this tax string? Store the OTU if (defined($taxCount->{$tax})){ $taxCount->{$tax}->{otu} = $otu; if ($taxid){ $taxCount->{$tax}->{taxid} = $taxid; } } } close(O) or die "Failed to close filehande on OTU table.\n"; #now check all taxonomy strings have an otu. If this fails, #something is very wrong. foreach my $tax (keys %{$taxCount}){ if(!defined($taxCount->{$tax}->{otu})){ die "Fatal, |$tax| has not got an OTU code assigned\n"; } } if ($taxid){ if ($label =~ /^UNITE/) { open(T, ">", $outputFile) or die "Could not open 'taxid_file':[$!]\n"; print T "# Constructed from biom file\n# OTU ID\t".$label."\ttaxonomy\n"; } else{ open(T, ">", $outputFile) or die "Could not open 'taxid_file':[$!]\n"; print T "# Constructed from biom file\n# OTU ID\t".$label."\ttaxonomy\ttaxid\n"; } open(R, ">", $notaxidFile ) or die "Could not open $outputFile:[$!]\n"; print R "# Constructed from biom file\n# OTU ID\t".$label."\ttaxonomy\n"; }else{ open(R, ">", $outputFile ) or die "Could not open $outputFile:[$!]\n"; print R "# Constructed from biom file\n# OTU ID\t".$label."\ttaxonomy\n"; } #Print the file out foreach my $tax (sort{$a cmp $b} keys %{$taxCount}){ if ($taxid){ print T $taxCount->{$tax}->{otu}."\t".sprintf("%.1f", $taxCount->{$tax}->{count})."\t".$tax."\t".$taxCount->{$tax}->{taxid}."\n"; print R $taxCount->{$tax}->{otu}."\t".sprintf("%.1f", $taxCount->{$tax}->{count})."\t".$tax."\n"; }else{ print R $taxCount->{$tax}->{otu}."\t".sprintf("%.1f", $taxCount->{$tax}->{count})."\t".$tax."\n"; } } close(R) or die "Failed to close open filehandle on outputfile\n"; close(T) or die "Failed to close open filehandle on notaxidfile\n"; if($krona){ open(K, ">", $krona ) or die "Could not open $krona:[$!]\n"; foreach my $tax (sort{$a cmp $b} keys %{$taxCount}){ my $taxMod = $tax; $taxMod =~ s/\D\_\d{1}\_\_//g; my @tax = split(/\;/, $taxMod); $taxMod = join("\t", @tax); print K $taxCount->{$tax}->{count}."\t".$taxMod."\n"; } close(K) or die "Failed to close open filehandle on outputfile\n"; } sub help { print<<EOF; $0 --otuTable onsensus_taxonomy_7_levels.otu --query ERR1234567.outfile --outfile ERR1234567.tsv Options ------- otuTable : <string>, the OTU table produced for the taxonomies found in the reference databases that was used with MAPseq. query : <string>, the output from the MAPseq that assigns a taxonomy to a sequence. outfile : <string>, the file storing the tsv file. krona : <string>, output file name for the Krona text. (Optional). label : <string>, lable to add to the top of the outfile OTU table. h|help : prints this message. EOF exit 1; }