diff CountUniqueIDs.pl @ 2:2f48cf393d25 draft

Add Perl scripts missing from previous upload.
author pjbriggs
date Mon, 09 Apr 2018 04:56:28 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CountUniqueIDs.pl	Mon Apr 09 04:56:28 2018 -0400
@@ -0,0 +1,42 @@
+#! /usr/bin/perl -w
+
+use strict;
+
+#### Read thru a GFF file of motifs return the number of unique ids
+#### Ian Donaldson Sept 2008
+
+#### Usage
+unless(@ARGV == 2) {
+   die("USAGE: $0 | GFF file | Output file\n\n");
+}
+
+#### Ready output file
+open(GFF, "<$ARGV[0]") or die("Could not open GFF file!!\n\n");
+open(OUTPUT, ">$ARGV[1]") or die("Could not open output file!!\n\n");
+
+#### Hash to hold ids
+my %id_hash = ();
+
+#### Work thru GFF file
+while(defined(my $gff_line = <GFF>)) {
+   if($gff_line =~ /(^#|^\s)/) { next }
+
+   my @gff_line_bits = split(/\t/, $gff_line);
+
+   my $id = $gff_line_bits[0];
+
+   $id_hash{$id}=1;
+}
+   
+my @all_keys = sort(keys(%id_hash));
+
+my $elements = scalar(@all_keys);
+
+#print OUTPUT "There are $elements unique sequences in the file\n";
+print OUTPUT "$elements non-redundant sequences\n";
+
+#### Close files
+close(GFF);
+close(OUTPUT);
+
+exit;