diff phyloconversion/uniprotfasta2phytab.pl @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloconversion/uniprotfasta2phytab.pl	Tue Mar 11 12:19:13 2014 -0700
@@ -0,0 +1,38 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+use Bio::DB::Fasta;
+use Bio::SeqIO;
+use Bio::Seq;
+
+#inputs
+my $infile=shift(@ARGV);
+my $partition=shift(@ARGV);
+#my $delpipes=shift(@ARGV);
+my $species;
+
+my $seqid;
+# open infile fasta file
+my $in_obj = Bio::SeqIO->new(-file => $infile, '-format' =>'fasta');
+
+while (my $seq = $in_obj->next_seq() ) {
+        my $sequence = $seq->seq;
+        my @rawid = split(/\|/, $seq->id);
+        $seqid = $rawid[1];
+#       $seqid = $seq->id;
+
+        $sequence =~ s/\n//g;
+        $species = $seq->desc;
+        #species Name is after OS=
+        $species =~ s/.+OS\=//;
+        $species =~ s/.+OS\=//;
+        #species Name is before GN= sometimes PE=
+        $species =~ s/ GN\=.+//;
+        $species =~ s/ PE\=.+//;
+        $species =~ s/ /_/g;
+
+        print $species."\t".$partition."\t".$seqid."\t".$sequence."\n";
+}