Mercurial > repos > dereeper > pangenome_explorer
view COG/bac-genomics-scripts/seq_format-converter/seq_format-converter.pl @ 4:53df1177ff97 draft
Uploaded
author | dereeper |
---|---|
date | Thu, 30 May 2024 11:54:27 +0000 |
parents | e42d30da7a74 |
children |
line wrap: on
line source
#!/usr/bin/perl use warnings; use strict; use autodie; use Getopt::Long; use Bio::SeqIO; # bioperl module to handle sequence input/output my $usage = << "USAGE"; ################################################################## # $0 -i seq_file -f in_format -o out_format # # # # Converts a (multi-)sequence file of a specific format to a # # differently formatted output file, with the help of BioPerl # # (www.bioperl.org). # # Formats are e.g. embl, fasta, gbk. # # # # Mandatory options: # # -i, -input input sequence file # # -f, -format input format # # -o, -out_format output format # # Optional options: # # -h, -help print usage # # -v, -version print version number # # # # Adjust unix loop to run the script with all files in the # # current working directory, e.g.: # # for i in *.gbk; do perl seq_format_converter.pl -i \$i -f gbk \\ # # -o embl; done # # # # version 0.2, update: 03-02-2014 A Leimbach # # 10-11-2011 aleimba[at]gmx[dot]de # ################################################################## USAGE ; ### Get options with Getopt::Long my $infile; # input sequence file my $in_format; # input sequence file format my $out_format; # desired output file format my $version = 0.2; my ($opt_version, $opt_help); GetOptions ('input=s' => \$infile, 'format=s' => \$in_format, 'out_format=s' => \$out_format, 'version' => \$opt_version, 'help|?' => \$opt_help); ### Print usage if ($opt_help) { die $usage; } elsif ($opt_version) { die "$0 $version\n"; } elsif (!$infile || !$in_format || !$out_format) { die $usage, "### Fatal error: Option(s) or argument(s) for \'-i\', \'-f\', \'-o\' are missing!\n\n"; } ### Allow shorter format string for 'genbank' $in_format = 'genbank' if ($in_format =~ /gbk/i); my $outfile = $infile; $outfile =~ s/\.\w+$/\.$out_format/; # remove file extension from infile and append out_format $out_format = 'genbank' if ($out_format =~ /gbk/i); ### SeqIO objects for input and output my $seq_in = Bio::SeqIO->new(-file => "<$infile", -format => $in_format); # a Bio::SeqIO object my $seq_out = Bio::SeqIO->new(-file => ">$outfile", -format => $out_format); # a Bio::SeqIO object ### Write sequence to different format while (my $seqobj = $seq_in->next_seq) { # a Bio::Seq object $seq_out->write_seq($seqobj); } print "\n\tCreated new file $outfile!\n\n"; exit;